Repository: google/riegeli
Branch: master
Commit: a0a8dac780d1
Files: 498
Total size: 5.0 MB

Directory structure:
gitextract_1atzokxc/

├── .bazelrc
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── MODULE.bazel
├── README.md
├── configure
├── doc/
│   ├── index.md
│   ├── record_writer_options.md
│   └── riegeli_records_file_format.md
├── python/
│   ├── BUILD
│   ├── MANIFEST.in
│   ├── README.md
│   ├── __init__.py
│   ├── build_pip_package.sh
│   ├── dummy_binary.py
│   ├── riegeli/
│   │   ├── BUILD
│   │   ├── BUILD.tpl
│   │   ├── __init__.py
│   │   ├── base/
│   │   │   ├── BUILD
│   │   │   ├── __init__.py
│   │   │   ├── riegeli_error.py
│   │   │   ├── utils.cc
│   │   │   └── utils.h
│   │   ├── bytes/
│   │   │   ├── BUILD
│   │   │   ├── python_reader.cc
│   │   │   ├── python_reader.h
│   │   │   ├── python_writer.cc
│   │   │   └── python_writer.h
│   │   ├── py_extension.bzl
│   │   ├── python_configure.bzl
│   │   ├── records/
│   │   │   ├── BUILD
│   │   │   ├── __init__.py
│   │   │   ├── examples/
│   │   │   │   ├── BUILD
│   │   │   │   ├── __init__.py
│   │   │   │   └── write_read_records.py
│   │   │   ├── record_position.cc
│   │   │   ├── record_position.h
│   │   │   ├── record_reader.cc
│   │   │   ├── record_writer.cc
│   │   │   ├── records_metadata.proto
│   │   │   ├── skipped_region.py
│   │   │   └── tests/
│   │   │       ├── BUILD
│   │   │       ├── __init__.py
│   │   │       ├── records_test.proto
│   │   │       └── records_test.py
│   │   └── tensorflow/
│   │       ├── BUILD
│   │       ├── __init__.py
│   │       ├── kernel_tests/
│   │       │   ├── __init__.py
│   │       │   └── riegeli_dataset_test.py
│   │       └── ops/
│   │           ├── __init__.py
│   │           └── riegeli_dataset_ops.py
│   └── setup.py
├── riegeli/
│   ├── .gitignore
│   ├── BUILD
│   ├── base/
│   │   ├── BUILD
│   │   ├── any.h
│   │   ├── any_initializer.h
│   │   ├── any_internal.h
│   │   ├── arithmetic.h
│   │   ├── assert.cc
│   │   ├── assert.h
│   │   ├── background_cleaning.cc
│   │   ├── background_cleaning.h
│   │   ├── binary_search.h
│   │   ├── buffer.cc
│   │   ├── buffer.h
│   │   ├── buffering.h
│   │   ├── byte_fill.cc
│   │   ├── byte_fill.h
│   │   ├── bytes_ref.h
│   │   ├── c_string_ref.h
│   │   ├── chain.cc
│   │   ├── chain.h
│   │   ├── chain_base.h
│   │   ├── chain_details.h
│   │   ├── closing_ptr.h
│   │   ├── compact_string.cc
│   │   ├── compact_string.h
│   │   ├── compare.h
│   │   ├── constexpr.h
│   │   ├── cord_iterator_span.cc
│   │   ├── cord_iterator_span.h
│   │   ├── cord_utils.cc
│   │   ├── cord_utils.h
│   │   ├── debug.cc
│   │   ├── debug.h
│   │   ├── dependency.h
│   │   ├── dependency_base.h
│   │   ├── dependency_manager.h
│   │   ├── errno_mapping.cc
│   │   ├── errno_mapping.h
│   │   ├── estimated_allocated_size.h
│   │   ├── external_data.cc
│   │   ├── external_data.h
│   │   ├── external_ref.h
│   │   ├── external_ref_base.h
│   │   ├── external_ref_support.h
│   │   ├── global.h
│   │   ├── hybrid_direct_common.h
│   │   ├── hybrid_direct_internal.h
│   │   ├── hybrid_direct_map.h
│   │   ├── hybrid_direct_set.h
│   │   ├── initializer.h
│   │   ├── initializer_internal.h
│   │   ├── intrusive_shared_ptr.h
│   │   ├── invoker.h
│   │   ├── iterable.h
│   │   ├── maker.h
│   │   ├── memory_estimator.cc
│   │   ├── memory_estimator.h
│   │   ├── moving_dependency.h
│   │   ├── new_aligned.h
│   │   ├── null_safe_memcpy.h
│   │   ├── object.cc
│   │   ├── object.h
│   │   ├── optional_compact_string.h
│   │   ├── options_parser.cc
│   │   ├── options_parser.h
│   │   ├── ownership.h
│   │   ├── parallelism.cc
│   │   ├── parallelism.h
│   │   ├── port.h
│   │   ├── recycling_pool.h
│   │   ├── ref_count.h
│   │   ├── reset.h
│   │   ├── shared_buffer.cc
│   │   ├── shared_buffer.h
│   │   ├── shared_ptr.h
│   │   ├── sized_shared_buffer.cc
│   │   ├── sized_shared_buffer.h
│   │   ├── stable_dependency.h
│   │   ├── status.cc
│   │   ├── status.h
│   │   ├── stream_utils.cc
│   │   ├── stream_utils.h
│   │   ├── string_ref.h
│   │   ├── string_utils.cc
│   │   ├── string_utils.h
│   │   ├── temporary_storage.h
│   │   ├── type_erased_ref.h
│   │   ├── type_id.h
│   │   ├── type_traits.h
│   │   ├── types.h
│   │   ├── unicode.cc
│   │   ├── unicode.h
│   │   └── uninitialized_vector.h
│   ├── brotli/
│   │   ├── BUILD
│   │   ├── brotli_allocator.cc
│   │   ├── brotli_allocator.h
│   │   ├── brotli_dictionary.cc
│   │   ├── brotli_dictionary.h
│   │   ├── brotli_reader.cc
│   │   ├── brotli_reader.h
│   │   ├── brotli_writer.cc
│   │   └── brotli_writer.h
│   ├── bytes/
│   │   ├── BUILD
│   │   ├── array_backward_writer.cc
│   │   ├── array_backward_writer.h
│   │   ├── array_writer.cc
│   │   ├── array_writer.h
│   │   ├── backward_writer.cc
│   │   ├── backward_writer.h
│   │   ├── buffer_options.cc
│   │   ├── buffer_options.h
│   │   ├── buffered_reader.cc
│   │   ├── buffered_reader.h
│   │   ├── buffered_writer.cc
│   │   ├── buffered_writer.h
│   │   ├── cfile_handle.cc
│   │   ├── cfile_handle.h
│   │   ├── cfile_internal.cc
│   │   ├── cfile_internal.h
│   │   ├── cfile_internal_for_cc.h
│   │   ├── cfile_reader.cc
│   │   ├── cfile_reader.h
│   │   ├── cfile_writer.cc
│   │   ├── cfile_writer.h
│   │   ├── chain_backward_writer.cc
│   │   ├── chain_backward_writer.h
│   │   ├── chain_reader.cc
│   │   ├── chain_reader.h
│   │   ├── chain_writer.cc
│   │   ├── chain_writer.h
│   │   ├── compact_string_writer.h
│   │   ├── copy_all.cc
│   │   ├── copy_all.h
│   │   ├── cord_backward_writer.cc
│   │   ├── cord_backward_writer.h
│   │   ├── cord_reader.cc
│   │   ├── cord_reader.h
│   │   ├── cord_writer.cc
│   │   ├── cord_writer.h
│   │   ├── fd_handle.cc
│   │   ├── fd_handle.h
│   │   ├── fd_internal.cc
│   │   ├── fd_internal.h
│   │   ├── fd_internal_for_cc.h
│   │   ├── fd_mmap_reader.cc
│   │   ├── fd_mmap_reader.h
│   │   ├── fd_reader.cc
│   │   ├── fd_reader.h
│   │   ├── fd_writer.cc
│   │   ├── fd_writer.h
│   │   ├── file_mode_string.cc
│   │   ├── file_mode_string.h
│   │   ├── iostream_internal.h
│   │   ├── istream_reader.cc
│   │   ├── istream_reader.h
│   │   ├── joining_reader.cc
│   │   ├── joining_reader.h
│   │   ├── limiting_backward_writer.cc
│   │   ├── limiting_backward_writer.h
│   │   ├── limiting_reader.cc
│   │   ├── limiting_reader.h
│   │   ├── limiting_writer.cc
│   │   ├── limiting_writer.h
│   │   ├── null_backward_writer.cc
│   │   ├── null_backward_writer.h
│   │   ├── null_writer.cc
│   │   ├── null_writer.h
│   │   ├── ostream_writer.cc
│   │   ├── ostream_writer.h
│   │   ├── path_ref.h
│   │   ├── position_shifting_backward_writer.cc
│   │   ├── position_shifting_backward_writer.h
│   │   ├── position_shifting_reader.cc
│   │   ├── position_shifting_reader.h
│   │   ├── position_shifting_writer.cc
│   │   ├── position_shifting_writer.h
│   │   ├── prefix_limiting_backward_writer.cc
│   │   ├── prefix_limiting_backward_writer.h
│   │   ├── prefix_limiting_reader.cc
│   │   ├── prefix_limiting_reader.h
│   │   ├── prefix_limiting_writer.cc
│   │   ├── prefix_limiting_writer.h
│   │   ├── pullable_reader.cc
│   │   ├── pullable_reader.h
│   │   ├── pushable_backward_writer.cc
│   │   ├── pushable_backward_writer.h
│   │   ├── pushable_writer.cc
│   │   ├── pushable_writer.h
│   │   ├── read_all.cc
│   │   ├── read_all.h
│   │   ├── reader.cc
│   │   ├── reader.h
│   │   ├── reader_cfile.cc
│   │   ├── reader_cfile.h
│   │   ├── reader_factory.cc
│   │   ├── reader_factory.h
│   │   ├── reader_istream.cc
│   │   ├── reader_istream.h
│   │   ├── resizable_writer.cc
│   │   ├── resizable_writer.h
│   │   ├── restricted_chain_writer.cc
│   │   ├── restricted_chain_writer.h
│   │   ├── splitting_writer.cc
│   │   ├── splitting_writer.h
│   │   ├── std_io.cc
│   │   ├── std_io.h
│   │   ├── string_reader.cc
│   │   ├── string_reader.h
│   │   ├── string_writer.cc
│   │   ├── string_writer.h
│   │   ├── stringify.h
│   │   ├── stringify_writer.h
│   │   ├── vector_writer.h
│   │   ├── wrapping_backward_writer.cc
│   │   ├── wrapping_backward_writer.h
│   │   ├── wrapping_reader.cc
│   │   ├── wrapping_reader.h
│   │   ├── wrapping_writer.cc
│   │   ├── wrapping_writer.h
│   │   ├── write.h
│   │   ├── write_int_internal.cc
│   │   ├── write_int_internal.h
│   │   ├── writer.cc
│   │   ├── writer.h
│   │   ├── writer_cfile.cc
│   │   ├── writer_cfile.h
│   │   ├── writer_ostream.cc
│   │   └── writer_ostream.h
│   ├── bzip2/
│   │   ├── BUILD
│   │   ├── bzip2_error.cc
│   │   ├── bzip2_error.h
│   │   ├── bzip2_reader.cc
│   │   ├── bzip2_reader.h
│   │   ├── bzip2_writer.cc
│   │   └── bzip2_writer.h
│   ├── chunk_encoding/
│   │   ├── BUILD
│   │   ├── README.md
│   │   ├── brotli_encoder_selection.cc
│   │   ├── brotli_encoder_selection.h
│   │   ├── chunk.cc
│   │   ├── chunk.h
│   │   ├── chunk_decoder.cc
│   │   ├── chunk_decoder.h
│   │   ├── chunk_encoder.cc
│   │   ├── chunk_encoder.h
│   │   ├── compressor.cc
│   │   ├── compressor.h
│   │   ├── compressor_options.cc
│   │   ├── compressor_options.h
│   │   ├── constants.h
│   │   ├── decompressor.cc
│   │   ├── decompressor.h
│   │   ├── deferred_encoder.cc
│   │   ├── deferred_encoder.h
│   │   ├── field_projection.h
│   │   ├── hash.cc
│   │   ├── hash.h
│   │   ├── simple_decoder.cc
│   │   ├── simple_decoder.h
│   │   ├── simple_encoder.cc
│   │   ├── simple_encoder.h
│   │   ├── transpose_decoder.cc
│   │   ├── transpose_decoder.h
│   │   ├── transpose_encoder.cc
│   │   ├── transpose_encoder.h
│   │   └── transpose_internal.h
│   ├── containers/
│   │   ├── BUILD
│   │   ├── chunked_sorted_string_set.cc
│   │   ├── chunked_sorted_string_set.h
│   │   ├── linear_sorted_string_set.cc
│   │   └── linear_sorted_string_set.h
│   ├── csv/
│   │   ├── BUILD
│   │   ├── csv_reader.cc
│   │   ├── csv_reader.h
│   │   ├── csv_record.cc
│   │   ├── csv_record.h
│   │   ├── csv_writer.cc
│   │   └── csv_writer.h
│   ├── digests/
│   │   ├── BUILD
│   │   ├── adler32_digester.cc
│   │   ├── adler32_digester.h
│   │   ├── crc32_digester.cc
│   │   ├── crc32_digester.h
│   │   ├── crc32c_digester.h
│   │   ├── digest_converter.h
│   │   ├── digester_handle.cc
│   │   ├── digester_handle.h
│   │   ├── digesting_reader.cc
│   │   ├── digesting_reader.h
│   │   ├── digesting_writer.cc
│   │   ├── digesting_writer.h
│   │   ├── highwayhash_digester.cc
│   │   ├── highwayhash_digester.h
│   │   ├── md5_digester.h
│   │   ├── openssl_digester.h
│   │   ├── sha1_digester.h
│   │   ├── sha256_digester.h
│   │   ├── sha512_256_digester.h
│   │   ├── sha512_digester.h
│   │   └── wrapping_digester.h
│   ├── endian/
│   │   ├── BUILD
│   │   ├── endian_reading.h
│   │   └── endian_writing.h
│   ├── gcs/
│   │   ├── BUILD
│   │   ├── gcs_internal.h
│   │   ├── gcs_object.cc
│   │   ├── gcs_object.h
│   │   ├── gcs_reader.cc
│   │   ├── gcs_reader.h
│   │   ├── gcs_writer.cc
│   │   └── gcs_writer.h
│   ├── lines/
│   │   ├── BUILD
│   │   ├── line_reading.cc
│   │   ├── line_reading.h
│   │   ├── line_writing.h
│   │   ├── newline.h
│   │   ├── text_reader.cc
│   │   ├── text_reader.h
│   │   ├── text_writer.cc
│   │   └── text_writer.h
│   ├── lz4/
│   │   ├── BUILD
│   │   ├── lz4_dictionary.cc
│   │   ├── lz4_dictionary.h
│   │   ├── lz4_reader.cc
│   │   ├── lz4_reader.h
│   │   ├── lz4_writer.cc
│   │   └── lz4_writer.h
│   ├── messages/
│   │   ├── BUILD
│   │   ├── context_projection.h
│   │   ├── dynamic_field_handler.h
│   │   ├── field_copier.h
│   │   ├── field_handler_map.h
│   │   ├── field_handlers.cc
│   │   ├── field_handlers.h
│   │   ├── map_entry_field.h
│   │   ├── message_wire_format.h
│   │   ├── parse_message.cc
│   │   ├── parse_message.h
│   │   ├── serialize_message.cc
│   │   ├── serialize_message.h
│   │   ├── serialized_message_assembler.cc
│   │   ├── serialized_message_assembler.h
│   │   ├── serialized_message_backward_writer.cc
│   │   ├── serialized_message_backward_writer.h
│   │   ├── serialized_message_internal.h
│   │   ├── serialized_message_reader.cc
│   │   ├── serialized_message_reader.h
│   │   ├── serialized_message_reader_internal.h
│   │   ├── serialized_message_writer.cc
│   │   ├── serialized_message_writer.h
│   │   ├── text_parse_message.cc
│   │   ├── text_parse_message.h
│   │   ├── text_print_message.cc
│   │   └── text_print_message.h
│   ├── ordered_varint/
│   │   ├── BUILD
│   │   ├── ordered_varint_internal.h
│   │   ├── ordered_varint_reading.cc
│   │   ├── ordered_varint_reading.h
│   │   ├── ordered_varint_writing.cc
│   │   └── ordered_varint_writing.h
│   ├── records/
│   │   ├── BUILD
│   │   ├── README.md
│   │   ├── block.h
│   │   ├── chunk_reader.cc
│   │   ├── chunk_reader.h
│   │   ├── chunk_writer.cc
│   │   ├── chunk_writer.h
│   │   ├── record_position.cc
│   │   ├── record_position.h
│   │   ├── record_reader.cc
│   │   ├── record_reader.h
│   │   ├── record_writer.cc
│   │   ├── record_writer.h
│   │   ├── records_metadata.proto
│   │   ├── skipped_region.cc
│   │   ├── skipped_region.h
│   │   └── tools/
│   │       ├── BUILD
│   │       ├── describe_riegeli_file.cc
│   │       ├── records_benchmark.cc
│   │       ├── riegeli_summary.proto
│   │       ├── tfrecord_recognizer.cc
│   │       └── tfrecord_recognizer.h
│   ├── snappy/
│   │   ├── BUILD
│   │   ├── framed/
│   │   │   ├── BUILD
│   │   │   ├── framed_snappy_reader.cc
│   │   │   ├── framed_snappy_reader.h
│   │   │   ├── framed_snappy_writer.cc
│   │   │   └── framed_snappy_writer.h
│   │   ├── hadoop/
│   │   │   ├── BUILD
│   │   │   ├── hadoop_snappy_reader.cc
│   │   │   ├── hadoop_snappy_reader.h
│   │   │   ├── hadoop_snappy_writer.cc
│   │   │   └── hadoop_snappy_writer.h
│   │   ├── snappy_reader.cc
│   │   ├── snappy_reader.h
│   │   ├── snappy_streams.cc
│   │   ├── snappy_streams.h
│   │   ├── snappy_writer.cc
│   │   └── snappy_writer.h
│   ├── tensorflow/
│   │   ├── BUILD
│   │   ├── io/
│   │   │   ├── BUILD
│   │   │   ├── file_reader.cc
│   │   │   ├── file_reader.h
│   │   │   ├── file_writer.cc
│   │   │   ├── file_writer.h
│   │   │   └── tstring_writer.h
│   │   ├── kernels/
│   │   │   └── riegeli_dataset_ops.cc
│   │   └── ops/
│   │       └── riegeli_dataset_ops.cc
│   ├── text/
│   │   ├── BUILD
│   │   ├── ascii_align.h
│   │   ├── concat.h
│   │   ├── join.h
│   │   ├── write_int.cc
│   │   └── write_int.h
│   ├── varint/
│   │   ├── BUILD
│   │   ├── varint_internal.h
│   │   ├── varint_reading.cc
│   │   ├── varint_reading.h
│   │   └── varint_writing.h
│   ├── xz/
│   │   ├── BUILD
│   │   ├── xz_error.cc
│   │   ├── xz_error.h
│   │   ├── xz_reader.cc
│   │   ├── xz_reader.h
│   │   ├── xz_writer.cc
│   │   └── xz_writer.h
│   ├── zlib/
│   │   ├── BUILD
│   │   ├── zlib_dictionary.h
│   │   ├── zlib_error.cc
│   │   ├── zlib_error.h
│   │   ├── zlib_reader.cc
│   │   ├── zlib_reader.h
│   │   ├── zlib_writer.cc
│   │   └── zlib_writer.h
│   └── zstd/
│       ├── BUILD
│       ├── zstd_dictionary.cc
│       ├── zstd_dictionary.h
│       ├── zstd_reader.cc
│       ├── zstd_reader.h
│       ├── zstd_writer.cc
│       └── zstd_writer.h
└── tf_dependency/
    ├── BUILD
    ├── BUILD.tpl
    └── tf_configure.bzl

================================================
FILE CONTENTS
================================================

================================================
FILE: .bazelrc
================================================
# Enable Bzlmod by default.
common --enable_bzlmod

# Use C++17.
build --cxxopt=-std=c++17
build --host_cxxopt=-std=c++17

# Make Python protos faster by backing them with C++ protos.
# TODO: Reenable once protobuf releases
# https://github.com/protocolbuffers/protobuf/pull/22633
# i.e. in version > 32.0. Or possibly switch to upb.
# build --define=use_fast_cpp_protos=true

# Options from ./configure
# This is currently disabled because TensorFlow does not support bzlmod,
# hence Riegeli/TensorFlow bindings are broken anyway.
# import %workspace%/configure.bazelrc


================================================
FILE: CONTRIBUTING.md
================================================
# How to Contribute

We'd love to accept your patches and contributions to this project. There are
just a few small guidelines you need to follow.

## Contributor License Agreement

Contributions to this project must be accompanied by a Contributor License
Agreement. You (or your employer) retain the copyright to your contribution,
this simply gives us permission to use and redistribute your contributions as
part of the project. Head over to <https://cla.developers.google.com/> to see
your current agreements on file or to sign a new one.

You generally only need to submit a CLA once, so if you've already submitted one
(even if it was for a different project), you probably don't need to do it
again.

## Code reviews

All submissions, including submissions by project members, require review. We
use GitHub pull requests for this purpose. Consult
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
information on using pull requests.


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: MANIFEST.in
================================================
include .bazelrc
include *.md
include LICENSE
include MANIFEST.in
include WORKSPACE
include configure
recursive-include doc *
recursive-include python *
recursive-include riegeli *
recursive-include third_party *


================================================
FILE: MODULE.bazel
================================================
module(
    name = "riegeli",
    repo_name = "com_google_riegeli",
)

bazel_dep(
    name = "abseil-cpp",
    version = "20260107.0",
    repo_name = "com_google_absl",
)
bazel_dep(
    name = "abseil-py",
    version = "2.1.0",
    repo_name = "absl_py",
)
bazel_dep(
    name = "bazel_skylib",
    version = "1.7.1",
)
bazel_dep(
    name = "boringssl",
    version = "0.0.0-20240530-2db0eb3",
)
bazel_dep(
    name = "brotli",
    version = "1.1.0",
    repo_name = "org_brotli",
)
bazel_dep(
    name = "bzip2",
    version = "1.0.8",
)
bazel_dep(
    name = "highwayhash",
    version = "0.0.0-20240305-5ad3bf8.bcr.1",
)
bazel_dep(
    name = "lz4",
    version = "1.9.4",
)
bazel_dep(
    name = "platforms",
    version = "0.0.9",
)
bazel_dep(
    name = "protobuf",
    version = "33.2",
    repo_name = "com_google_protobuf",
)
bazel_dep(
    name = "rules_cc",
    version = "0.1.2",
)
bazel_dep(
    name = "rules_python",
    version = "0.36.0",
)
bazel_dep(
    name = "snappy",
    version = "1.2.0",
)
bazel_dep(
    name = "xz",
    version = "5.4.5.bcr.1",
)
bazel_dep(
    name = "zlib",
    version = "1.3.1.bcr.3",
)
bazel_dep(
    name = "zstd",
    version = "1.5.6",
    repo_name = "net_zstd",
)
bazel_dep(
    name = "google_cloud_cpp",
    version = "3.0.0-rc1",
)

# Configure hermetic Python toolchain
SUPPORTED_PYTHON_VERSIONS = [
    "3.8",
    "3.9",
    "3.10",
    "3.11",
    "3.12",
]

DEFAULT_PYTHON_VERSION = SUPPORTED_PYTHON_VERSIONS[-1]

python = use_extension("@rules_python//python/extensions:python.bzl", "python")

[
    python.toolchain(
        is_default = version == DEFAULT_PYTHON_VERSION,
        python_version = version,
    )
    for version in SUPPORTED_PYTHON_VERSIONS
]


================================================
FILE: README.md
================================================
# Riegeli

*Riegeli/records* is a file format for storing a sequence of string records,
typically serialized protocol buffers. It supports dense compression, fast
decoding, seeking, detection and optional skipping of data corruption, filtering
of proto message fields for even faster decoding, and parallel encoding.

See [documentation](https://github.com/google/riegeli/blob/master/doc/index.md).

# Status

Riegeli file format will only change in a backward compatible way (i.e. future
readers will understand current files, but current readers might not understand
files using future features).

Riegeli C++ API might change in incompatible ways.


================================================
FILE: configure
================================================
#!/bin/bash
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e

PYTHON_BIN_PATH=`which python`

if [[ $PYTHON_BIN_PATH ]] && $PYTHON_BIN_PATH -c "import tensorflow" &>/dev/null; then
  TF_CFLAGS=$($PYTHON_BIN_PATH -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))')
  TF_LFLAGS=$($PYTHON_BIN_PATH -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))')

  TF_HEADER_DIR=${TF_CFLAGS%% *}
  TF_HEADER_DIR=${TF_HEADER_DIR#-I}
  TF_SHARED_LIBRARY_DIR=${TF_LFLAGS%% *}
  TF_SHARED_LIBRARY_DIR=${TF_SHARED_LIBRARY_DIR#-L}
  TF_SHARED_LIBRARY_NAME=${TF_LFLAGS##* -l:}
else
  TF_HEADER_DIR=
  TF_SHARED_LIBRARY_DIR=
  TF_SHARED_LIBRARY_NAME=
fi

{
  printf 'build --action_env PYTHON_BIN_PATH="%s"\n' "$PYTHON_BIN_PATH"
  printf 'build --action_env TF_HEADER_DIR="%s"\n' "$TF_HEADER_DIR"
  printf 'build --action_env TF_SHARED_LIBRARY_DIR="%s"\n' "$TF_SHARED_LIBRARY_DIR"
  printf 'build --action_env TF_SHARED_LIBRARY_NAME="%s"\n' "$TF_SHARED_LIBRARY_NAME"
} >configure.bazelrc

echo "Set up configure.bazelrc. Make sure to include it in your .bazelrc file."


================================================
FILE: doc/index.md
================================================
# Riegeli

*Riegeli/records* is a file format for storing a sequence of string records,
typically serialized protocol buffers. It supports dense compression, fast
decoding, seeking, detection and optional skipping of data corruption, filtering
of proto message fields for even faster decoding, and parallel encoding.

*   [Specification of Riegeli/records file format](riegeli_records_file_format.md).
*   [Specifying options for writing Riegeli/records files](record_writer_options.md).


================================================
FILE: doc/record_writer_options.md
================================================
# Specifying options for writing Riegeli/records files

Options for writing Riegeli/records files can be specified as a string:

```data
  options ::= option? ("," option?)*
  option ::=
    "default" |
    "transpose" (":" ("true" | "false"))? |
    "uncompressed" |
    "brotli" (":" brotli_level)? |
    "zstd" (":" zstd_level)? |
    "snappy" (":" snappy_level)? |
    "window_log" ":" window_log |
    "brotli_encoder" ":" ("rbrotli_or_cbrotli" | "cbrotli" | "rbrotli") |
    "chunk_size" ":" chunk_size |
    "bucket_fraction" ":" bucket_fraction |
    "padding" (":" padding)? |
    "initial_padding" (":" padding)? |
    "final_padding" (":" padding)? |
    "parallelism" ":" parallelism
  brotli_level ::= integer in the range [0..11] (default 6)
  zstd_level ::= integer in the range [-131072..22] (default 3)
  snappy_level ::= integer in the range [1..2] (default 1)
  window_log ::= "auto" or integer in the range [10..31]
  chunk_size ::= "auto" or positive integer expressed as real with optional
    suffix [BkKMGTPE]
  bucket_fraction ::= real in the range [0..1]
  padding ::= positive integer expressed as real with optional suffix [BkKMGTPE]
    (default 64K)
  parallelism ::= non-negative integer
```

An empty string is the same as `default`.

## `transpose`

If `true` (`transpose` is the same as `transpose:true`), records should be
serialized proto messages (but nothing will break if they are not). A chunk of
records will be processed in a way which allows for better compression.

If `false`, a chunk of records will be stored in a simpler format, directly or
with compression.

Default: `false`.

## Compression algorithms

### `uncompressed`

Changes compression algorithm to Uncompressed (turns compression off).

### `brotli`

Changes compression algorithm to [Brotli](https://github.com/google/brotli).
Sets compression level which tunes the tradeoff between compression density and
compression speed (higher = better density but slower).

`brotli_level` must be between 0 and 11. Default: `6`.

This is the default compression algorithm.

### `zstd`

Changes compression algorithm to [Zstd](https://facebook.github.io/zstd/). Sets
compression level which tunes the tradeoff between compression density and
compression speed (higher = better density but slower).

`zstd_level` must be between -131072 and 22. Level 0 is currently equivalent to
3. Default: 3.

### `snappy`

Changes compression algorithm to [Snappy](https://google.github.io/snappy/).

`snappy_level` must be between 1 and 2. Default: 1.

## `window_log`

Logarithm of the LZ77 sliding window size. This tunes the tradeoff between
compression density and memory usage (higher = better density but more memory).

Special value `auto` means to keep the default (`brotli`: 22, `zstd`: derived
from compression level and chunk size).

For `uncompressed` and `snappy`, `window_log` must be `auto`. For `brotli`,
`window_log` must be `auto` or between 10 and 30. For `zstd`, `window_log` must
be `auto` or between 10 and 30 in 32-bit build, 31 in 64-bit build.

Default: `auto`.

## `chunk_size`

Sets the desired uncompressed size of a chunk which groups messages to be
transposed, compressed, and written together.

A larger chunk size improves compression density; a smaller chunk size allows to
read pieces of the file independently with finer granularity, and reduces memory
usage of both writer and reader.

Special value `auto` means to keep the default (compressed: 1M, uncompressed:
4k).

Default: `auto`.

## `bucket_fraction`

Sets the desired uncompressed size of a bucket which groups values of several
fields of the given wire type to be compressed together, relative to the desired
chunk size, on the scale between 0.0 (compress each field separately) to 1.0
(put all fields of the same wire type in the same bucket.

This is meaningful if transpose and compression are enabled. A larger bucket
size improves compression density; a smaller bucket size makes reading with
projection faster, allowing to skip decompression of values of fields which are
not included.

Default 1.0.

## `padding`

If `padding > 1`, padding is written at the beginning, when flushing, and at the
end of the file, for the absolute position to reach a multiple of `padding`.

Consequences if `padding` is a multiple of 64KB:

1.  Physical concatenation of separately written files yields a valid file
    (setting metadata in subsequent files is wasteful but harmless).

2.  Even if the existing file was corrupted or truncated, data appended to it
    will be recoverable.

The cost is that up to `padding` bytes is wasted when padding is written.

`padding` is a shortcut for `set_initial_padding` with `set_final_padding`.

`padding` without the parameter assumes 64KB.

Default: 1 (no padding).

## `initial_padding`

If `initial_padding > 1`, padding is written at the beginning of the file, for
the absolute position to reach a multiple of `initial_padding`.

See `padding` for details.

`initial_padding` without the parameter assumes 64KB.

Default: 1 (no padding).

## `final_padding`

If `final_padding > 1`, padding is written when flushing and at the end of the
file, for the absolute position to reach a multiple of `final_padding`.

See `padding` for details.

`final_padding` without the parameter assumes 64KB.

Default: 1 (no padding).

## `parallelism`

Sets the maximum number of chunks being encoded in parallel in background.
Larger parallelism can increase throughput, up to a point where it no longer
matters; smaller parallelism reduces memory usage.

If `parallelism > 0`, chunks are written in background and reporting writing
errors is delayed.

Default: 0.


================================================
FILE: doc/riegeli_records_file_format.md
================================================
# Riegeli/records file format specification

## Summary

File contents are interpreted as a sequence of variable-sized *chunks,* where a
chunk encodes some number of *records.* A record can be any byte sequence but
Riegeli has special support for the common case where it is a serialized proto
message.

In order to support seeking and recovery after data corruption, the sequence of
chunks is interrupted by a *block header* at every multiple of the block size
which is 64 KiB. After the block header the interrupted chunk continues.

A record can be identified by the position of the chunk beginning and the index
of the record within the chunk. A record can also be identified by a number
resembling a file position, defined as the sum of the chunk beginning and the
record index.

## Conventions

Numbers in block headers and chunk headers are encoded as unsigned Little-Endian
integers.

Hashes are 64-bit [HighwayHash](https://github.com/google/highwayhash) values
with the key {0x2f696c6567656952, 0x0a7364726f636572, 0x2f696c6567656952,
0x0a7364726f636572} ('Riegeli/', 'records\n', 'Riegeli/', 'records\n').

## Block header

A block header allows to locate the chunk that the block header interrupts.
Block headers can interrupt a chunk at arbitrary points, including in the middle
of the chunk header.

If a block header lies exactly between chunks, it is considered to interrupt the
next chunk; this includes the situation at the beginning of the file. In this
case the chunk formally begins at the beginning of the block, even though it
contains no bytes before the block header.

*   Block header (24 bytes):
    *   `header_hash` (8 bytes) — hash of the rest of the header
        (`previous_chunk` and `next_chunk`)
    *   `previous_chunk` (8 bytes) — distance from the beginning of the chunk
        interrupted by this block header to the beginning of the block
    *   `next_chunk` (8 bytes) — distance from the beginning of the block to the
        end of the chunk interrupted by this block header

If `header_hash` does not match, then this block header is corrupted and must be
ignored. Block headers can be skipped during sequential file reading, they are
useful only for seeking and for error recovery.

## Chunk

A chunk must not begin inside nor immediately after a block header.

*   Chunk header (40 bytes):
    *   `header_hash` (8 bytes) — hash of the rest of the header (`data_size` up
        to and including `decoded_data_size`)
    *   `data_size` (8 bytes) — size of `data` (excluding intervening block
        headers)
    *   `data_hash` (8 bytes) — hash of `data`
    *   `chunk_type` (1 byte) — determines how to interpret `data`
    *   `num_records` (7 bytes) — number of records after decoding
    *   `decoded_data_size` (8 bytes) — sum of record sizes after decoding
*   `data` (`data_size` bytes) — encoded records or other data
*   `padding` — ignored (usually filled with zeros by the encoder)

If `header_hash` does not match, header contents cannot be trusted; if skipping
over corruption is desired, a valid chunk should be located using block headers.
If `data_hash` does not match, `data` is corrupted; if skipping over corruption
is desired, the chunk must be ignored.

The size of `padding` is the minimum size which satisfies the following
constraints:

*   The chunk (including chunk header, `data`, `padding`, and intervening block
    headers) has at least as many bytes as `num_records`.
*   The chunk does not end inside nor immediately after a block header.

If `num_records` is 0, `decoded_data_size` has a meaning depending on the chunk
type.

*Rationale:*

*The presence of `padding` allows to assign unique numbers resembling file
positions to records.*

*`decoded_data_size` is stored in the chunk header, instead of being implied by
or stored in `data`, to help decoders decide how many chunks to potentially read
ahead.*

## Chunk data

Some parts of chunk data are compressed. The compression format is generally
specified as `compression_type` (byte):

*   0 — none
*   0x62 ('b') — [Brotli](https://github.com/google/brotli)
*   0x7a ('z') — [Zstd](https://facebook.github.io/zstd/)
*   0x73 ('s') — [Snappy](https://google.github.io/snappy/)

Any compressed block is prefixed with its decompressed size (varint64) unless
`compression_type` is 0.

*Rationale:*

*Knowing the decompressed size can make easier for the decoder to decompress
data into a preallocated array.*

### File signature

`chunk_type` is 0x73 ('s').

A file signature chunk must be present at the beginning of the file. It may also
be present elsewhere, in which case it encodes no records and is ignored.

`data_size`, `num_records`, and `decoded_data_size` must be 0.

This makes the first 64 bytes of a Riegeli/records file fixed:

```data
83 af 70 d1 0d 88 4a 3f 00 00 00 00 00 00 00 00
40 00 00 00 00 00 00 00 91 ba c2 3c 92 87 e1 a9
00 00 00 00 00 00 00 00 e1 9f 13 c0 e9 b1 c3 72
73 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
```

### File metadata

`chunk_type` is 0x6d ('m').

A file metadata chunk provides information describing the records. Metadata are
not necessary to read the records but might be helpful to interpret their
contents.

If present, metadata should be written immediately after file signature.

The chunk is encoded like a transposed chunk with a single record containing a
serialized `RecordsMetadata` proto message, except that `chunk_type` is
different and `num_records` is 0.

### Padding chunk

`chunk_type` is 0x70 ('p').

A padding chunk encodes no records and only occupies file space.

`num_records` and `decoded_data_size` must be 0. `data` is ignored (usually
filled with zeros by the encoder).

This can be used for more efficient file concatenation (bringing the file offset
modulo `kBlockSize` to 0 allows for physical concatenation of files without
examining their contents), or for syncing to a file system which requires a
particular file offset granularity in order for the sync to be effective.

### Simple chunk with records

`chunk_type` is 0x72 ('r').

Simple chunks store record sizes and concatenated record contents in two
buffers, possibly compressed.

The format:

*   `compression_type` (byte) — compression type for sizes and values
*   `compressed_sizes_size` (varint64) — size of `compressed_sizes`
*   `compressed_sizes` (`compressed_sizes_size` bytes) - compressed buffer with
    record sizes
*   `compressed_values` (the rest of `data`) — compressed buffer with record
    values

`compressed_sizes`, after decompression, contains `num_records` varint64s: the
size of each record.

`compressed_values`, after decompression, contains `decoded_data_size` bytes:
concatenation of record values.

### Transposed chunk with records

`chunk_type` is 0x74 ('t').

TODO: Document this.

## Properties of the file format

*   Data corruption anywhere is detected whenever the hash allows this, and it
    causes only a local data loss of up to a chunk (if chunk data are damaged)
    or block (if chunk header is damaged).
*   It is possible to open for append and write more records, even without
    reading the original file contents; the original file size must be taken
    into account though.
*   Seeking to the chunk closest to the given file position requires a seek +
    small read, then iterating through chunk headers in a block.

## Implementation notes

The following formulas clarify how certain field values and positions can be
computed.

Constants for fixed sizes:

```c++
kBlockSize = 1 << 16;
kBlockHeaderSize = 24;
kUsableBlockSize = kBlockSize - kBlockHeaderSize;
kChunkHeaderSize = 40;
```

Constraints for chunk boundary distances in a block header:

```c++
previous_chunk % kBlockSize < kUsableBlockSize &&
next_chunk > 0 &&
(next_chunk - 1) % kBlockSize >= kBlockHeaderSize
```

End position of a chunk which begins at `chunk_begin`:

```c++
NumOverheadBlocks(pos, size) =
    (size + (pos + kUsableBlockSize - 1) % kBlockSize) / kUsableBlockSize;
AddWithOverhead(pos, size) =
    pos + size + NumOverheadBlocks(pos, size) * kBlockHeaderSize;

// Equivalent implementation using unsigned arithmetic modulo 1 << 64:
// RemainingInBlock(pos) = (-pos) % kBlockSize;
RemainingInBlock(pos) = kBlockSize - 1 - (pos + kBlockSize - 1) % kBlockSize;
SaturatingSub(a, b) = a > b ? a - b : 0;
// 0 -> 0, 1..25 -> 25, 26 -> 26, ..., 64K -> 64K, 64K+1..64K+25 -> 64K+25 etc.
RoundUpToPossibleChunkBoundary(pos) =
    pos + SaturatingSub(RemainingInBlock(pos), kUsableBlockSize - 1);

chunk_end = max(AddWithOverhead(chunk_begin, kChunkHeaderSize + data_size),
                RoundUpToPossibleChunkBoundary(chunk_begin + num_records));
```

Fields of a block header at `block_begin` which interrupts a chunk at
`chunk_begin`:

```c++
prev_chunk = block_begin - chunk_begin;
next_chunk = chunk_end - block_begin;
```


================================================
FILE: python/BUILD
================================================
load("@rules_python//python:defs.bzl", "py_binary")

package(default_visibility = ["//visibility:private"])

licenses(["notice"])

# These dependencies are gathered in a py_binary, instead of directly in
# sh_binary data, so that bazel links __init__.py files to runfiles.
py_binary(
    name = "dummy_binary",
    srcs = ["dummy_binary.py"],
    srcs_version = "PY3",
    deps = [
        "//python/riegeli",
        "//python/riegeli/tensorflow:riegeli_dataset_ops",
    ],
)

sh_binary(
    name = "build_pip_package",
    srcs = ["build_pip_package.sh"],
    data = [
        "MANIFEST.in",
        "README.md",
        "setup.py",
        ":dummy_binary",
    ],
)


================================================
FILE: python/MANIFEST.in
================================================
recursive-include riegeli *.py


================================================
FILE: python/README.md
================================================
# Riegeli

*Riegeli/records* is a file format for storing a sequence of string records,
typically serialized protocol buffers. It supports dense compression, fast
decoding, seeking, detection and optional skipping of data corruption, filtering
of proto message fields for even faster decoding, and parallel encoding.

See [documentation](https://github.com/google/riegeli/blob/master/doc/index.md).

# Status

Riegeli file format will only change in a backward compatible way (i.e. future
readers will understand current files, but current readers might not understand
files using future features).

Riegeli C++ API might change in incompatible ways.


================================================
FILE: python/__init__.py
================================================


================================================
FILE: python/build_pip_package.sh
================================================
#!/bin/bash
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Builds a pip package for riegeli.
#
# Usage (where DEST is a where to write the output, e.g. ~/riegeli-dist):
# $ bazel build -c opt python:build_pip_package
# $ bazel-bin/python/build_pip_package --dest DEST --sdist --bdist

set -e

function is_absolute {
  [[ "$1" = /* ]] || [[ "$1" =~ ^[a-zA-Z]:[/\\].* ]]
}

function real_path() {
  if is_absolute "$1"; then
    printf "%s" "$1"
  else
    printf "%s/%s" "$PWD" "${1#./}"
  fi
}

function build_sdist() {
  local dest=$1
  python python/setup.py sdist --dist-dir "$dest"
}

function build_bdist() {
  local dest=$1
  cd bazel-bin/python/build_pip_package.runfiles/com_google_riegeli/python
  python setup.py bdist_wheel --dist-dir "$dest"
  cd -
}

function main() {
  local dest=
  local sdist=false
  local bdist=false
  while [[ $# -gt 0 ]]; do
    if [[ $1 == --dest ]]; then
      shift
      dest=$(real_path "$1")
    elif [[ $1 == --sdist ]]; then
      sdist=true
    elif [[ $1 == --bdist ]]; then
      bdist=true
    else
      printf "Unknown flag: %s\n" "$1" >&2
      exit 1
    fi
    shift
  done
  if [[ -z $dest ]]; then
    printf "Missing required flag: --dest DIRECTORY\n" >&2
    exit 1
  fi
  if [[ $sdist != true ]] && [[ $bdist != true ]]; then
    printf "Nothing to do: missing --sdist or --bdist\n" >&2
    exit 1
  fi
  mkdir -p -- "$dest"
  if [[ $sdist = true ]]; then
    build_sdist "$dest"
  fi
  if [[ $bdist = true ]]; then
    build_bdist "$dest"
  fi
}

main "$@"


================================================
FILE: python/dummy_binary.py
================================================


================================================
FILE: python/riegeli/BUILD
================================================
# Riegeli, file format for storing a sequence of records.

load("@rules_python//python:defs.bzl", "py_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

exports_files(["LICENSE"])

py_library(
    name = "riegeli",
    srcs = ["__init__.py"],
    imports = [".."],
    deps = [
        "//python/riegeli/base:riegeli_error",
        "//python/riegeli/records:record_position",
        "//python/riegeli/records:record_reader",
        "//python/riegeli/records:record_writer",
        "//python/riegeli/records:records_metadata_py_pb2",
        "//python/riegeli/records:skipped_region",
    ],
)


================================================
FILE: python/riegeli/BUILD.tpl
================================================
load("@bazel_tools//tools/python:toolchain.bzl", "py_runtime_pair")

licenses(["restricted"])

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

toolchain(
    name = "toolchain",
    toolchain = ":py_runtime_pair",
    toolchain_type = "@bazel_tools//tools/python:toolchain_type",
)

# To build Python C/C++ extension on Windows, we need to link to python import library pythonXY.lib
# See https://docs.python.org/3/extending/windows.html
cc_import(
    name = "python_lib",
    interface_library = select({
        ":windows": ":python_import_lib",
        # A placeholder for Unix platforms which makes --no_build happy.
        "//conditions:default": "not-existing.lib",
    }),
    system_provided = 1,
)

cc_library(
    name = "python_headers",
    hdrs = [":python_include"],
    deps = select({
        ":windows": [":python_lib"],
        "//conditions:default": [],
    }),
    includes = ["python_include"],
)

cc_library(
    name = "numpy_headers",
    hdrs = [":numpy_include"],
    includes = ["numpy_include"],
)

config_setting(
    name = "windows",
    values = {"cpu": "x64_windows"},
    visibility = ["//visibility:public"],
)

%{PYTHON_RUNTIME_PAIR}
%{PYTHON_INCLUDE_GENRULE}
%{NUMPY_INCLUDE_GENRULE}
%{PYTHON_IMPORT_LIB_GENRULE}


================================================
FILE: python/riegeli/__init__.py
================================================
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Writes or reads Riegeli/records files."""

from riegeli.base import riegeli_error
from riegeli.records import record_position
from riegeli.records import record_reader
from riegeli.records import record_writer
from riegeli.records import records_metadata_pb2
from riegeli.records import skipped_region

__all__ = (
    'RiegeliError',
    'CancelledError',
    'UnknownError',
    'InvalidArgumentError',
    'DeadlineExceededError',
    'NotFoundError',
    'AlreadyExistsError',
    'PermissionDeniedError',
    'UnauthenticatedError',
    'ResourceExhaustedError',
    'FailedPreconditionError',
    'AbortedError',
    'OutOfRangeError',
    'UnimplementedError',
    'InternalError',
    'UnavailableError',
    'DataLossError',
    'FlushType',
    'RecordPosition',
    'SkippedRegion',
    'RecordsMetadata',
    'set_record_type',
    'RecordWriter',
    'EXISTENCE_ONLY',
    'get_record_type',
    'RecordReader',
)

# pylint: disable=invalid-name
RiegeliError = riegeli_error.RiegeliError
CancelledError = riegeli_error.CancelledError
UnknownError = riegeli_error.UnknownError
InvalidArgumentError = riegeli_error.InvalidArgumentError
DeadlineExceededError = riegeli_error.DeadlineExceededError
NotFoundError = riegeli_error.NotFoundError
AlreadyExistsError = riegeli_error.AlreadyExistsError
PermissionDeniedError = riegeli_error.PermissionDeniedError
UnauthenticatedError = riegeli_error.UnauthenticatedError
ResourceExhaustedError = riegeli_error.ResourceExhaustedError
FailedPreconditionError = riegeli_error.FailedPreconditionError
AbortedError = riegeli_error.AbortedError
OutOfRangeError = riegeli_error.OutOfRangeError
UnimplementedError = riegeli_error.UnimplementedError
InternalError = riegeli_error.InternalError
UnavailableError = riegeli_error.UnavailableError
DataLossError = riegeli_error.DataLossError
RecordPosition = record_position.RecordPosition
SkippedRegion = skipped_region.SkippedRegion
RecordsMetadata = records_metadata_pb2.RecordsMetadata
FlushType = record_writer.FlushType
set_record_type = record_writer.set_record_type
RecordWriter = record_writer.RecordWriter
EXISTENCE_ONLY = record_reader.EXISTENCE_ONLY
get_record_type = record_reader.get_record_type
RecordReader = record_reader.RecordReader


================================================
FILE: python/riegeli/base/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")
load("@rules_python//python:defs.bzl", "py_library")

package(
    default_visibility = ["//python/riegeli:__subpackages__"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "utils",
    srcs = ["utils.cc"],
    hdrs = ["utils.h"],
    data = [":riegeli_error"],  # Python module imported from C++.
    # utils.cc has #define before #include to influence what the included
    # files provide.
    features = ["-use_header_modules"],
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:chain",
        "//riegeli/base:compare",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@rules_python//python/cc:current_py_cc_headers",
    ],
)

py_library(
    name = "riegeli_error",
    srcs = ["riegeli_error.py"],
)


================================================
FILE: python/riegeli/base/__init__.py
================================================


================================================
FILE: python/riegeli/base/riegeli_error.py
================================================
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = (
    'RiegeliError',
    'CancelledError',
    'UnknownError',
    'InvalidArgumentError',
    'DeadlineExceededError',
    'NotFoundError',
    'AlreadyExistsError',
    'PermissionDeniedError',
    'UnauthenticatedError',
    'ResourceExhaustedError',
    'FailedPreconditionError',
    'AbortedError',
    'OutOfRangeError',
    'UnimplementedError',
    'InternalError',
    'UnavailableError',
    'DataLossError',
)


class RiegeliError(Exception):
  """Base class of errors reported by Google APIs.

  Sometimes multiple error codes may apply. Services should return the most
  specific error code that applies. For example, prefer `OutOfRangeError` over
  `FailedPreconditionError` if both codes apply. Similarly prefer
  `NotFoundError` or `AlreadyExistsError` over `FailedPreconditionError`.

  Attributes:
    code: Error code classifying the error, matching C++ StatusCode.
  """


class CancelledError(RiegeliError):
  """The operation was cancelled, typically by the caller."""

  code = 1


class UnknownError(RiegeliError):
  """Unknown error.

  For example, this error may be returned when a Status value received from
  another address space belongs to an error-space that is not known in this
  address space. Also errors raised by APIs that do not return enough error
  information may be converted to this error.
  """

  code = 2


class InvalidArgumentError(RiegeliError):
  """The client specified an invalid argument.

  Note that this differs from `FailedPreconditionError`. `InvalidArgumentError`
  indicates arguments that are problematic regardless of the state of the system
  (e.g., a malformed file name).
  """

  code = 3


class DeadlineExceededError(RiegeliError):
  """The deadline expired before the operation could complete.

  For operations that change the state of the system, this error may be returned
  even if the operation has completed successfully. or example, a successful
  response from a server could have been delayed long enough for the deadline to
  expire.
  """

  code = 4


class NotFoundError(RiegeliError):
  """Some requested entity (e.g., file or directory) was not found.

  Note to server developers: if a request is denied for an entire class of
  users, such as gradual feature rollout or undocumented allowlist,
  `NotFoundError` may be used. If a request is denied for some users within a
  class of users, such as user-based access control, `PermissionDeniedError`
  must be used.
  """

  code = 5


class AlreadyExistsError(RiegeliError):
  """The entity that a client attempted to create already exists."""

  code = 6


class PermissionDeniedError(RiegeliError):
  """The caller does not have permission to execute the specified operation.

  `PermissionDeniedError` must not be used for rejections caused by exhausting
  some resource (use `ResourceExhaustedError` instead for those errors).
  `PermissionDeniedError` must not be used if the caller can not be identified
  (use `UnauthenticatedError` instead for those errors). This error code does
  not imply the request is valid or the requested entity exists or satisfies
  other pre-conditions.
  """

  code = 7


class UnauthenticatedError(RiegeliError):
  """No valid authentication credentials for the operation."""

  code = 16


class ResourceExhaustedError(RiegeliError):
  """Some resource has been exhausted.

  Perhaps a per-user quota, or perhaps the entire file system is out of
  space.
  """

  code = 8


class FailedPreconditionError(RiegeliError):
  """Failed precondition.

  The operation was rejected because the system is not in a state required for
  the operation's execution. For example, the directory to be deleted is
  non-empty, an rmdir operation is applied to a non-directory, etc.

  A litmus test that may help a service implementor in deciding between
  `FailedPreconditionError`, `AbortedError`, and `UnavailableError`:
   (a) Use `UnavailableError` if the client can retry just the failing call.
   (b) Use `AbortedError` if the client should retry at a higher-level (e.g.,
       when a client-specified test-and-set fails, indicating the client should
       restart a read-modify-write sequence).
   (c) Use `FailedPreconditionError` if the client should not retry until the
       system state has been explicitly fixed. E.g., if an "rmdir" fails because
       the directory is non-empty, `FailedPreconditionError` should be returned
       since the client should not retry unless the files are deleted from the
       directory.
  """

  code = 9


class AbortedError(RiegeliError):
  """The operation was aborted.

  Typically due to a concurrency issue such as a sequencer check failure or
  transaction abort.

  See litmus test at `FailedPreconditionError` for deciding between
  `FailedPreconditionError`, `AbortedError`, and `UnavailableError`.
  """

  code = 10


class OutOfRangeError(RiegeliError):
  """The operation was attempted past the valid range.

  E.g., seeking or reading past end-of-file.

  Unlike `InvalidArgumentError`, this error indicates a problem that may be
  fixed if the system state changes. For example, a 32-bit file system will
  generate `InvalidArgumentError` if asked to read at an offset that is not in
  the range [0,2^32-1], but it will generate `OutOfRangeError` if asked to read
  from an offset past the current file size.

  There is a fair bit of overlap between `FailedPreconditionError` and
  `OutOfRangeError`. We recommend using `OutOfRangeError` (the more specific
  error) when it applies so that callers who are iterating through a space can
  easily look for an `OutOfRangeError` error to detect when they are done.
  """

  code = 11


class UnimplementedError(RiegeliError):
  """The operation is not implemented.

  Or is not supported/enabled in this service.
  """

  code = 12


class InternalError(RiegeliError):
  """Internal errors.

  This means that some invariants expected by the underlying system have been
  broken. This error code is reserved for serious errors.
  """

  code = 13


class UnavailableError(RiegeliError):
  """The service is currently unavailable.

  This is most likely a transient condition, which can be corrected by retrying
  with a backoff.

  See litmus test at `FailedPreconditionError` for deciding between
  `FailedPreconditionError`, `AbortedError`, and `UnavailableError`.
  """

  code = 14


class DataLossError(RiegeliError):
  """Unrecoverable data loss or corruption."""

  code = 15


================================================
FILE: python/riegeli/base/utils.cc
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// From https://docs.python.org/3/c-api/intro.html:
// Since Python may define some pre-processor definitions which affect the
// standard headers on some systems, you must include Python.h before any
// standard headers are included.
#define PY_SSIZE_T_CLEAN
#include <Python.h>
// clang-format: do not reorder the above include.

#include "python/riegeli/base/utils.h"
// clang-format: do not reorder the above include.

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <optional>
#include <string>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/types.h"

namespace riegeli::python {

Exception& Exception::operator=(const Exception& that) noexcept {
  PythonLock lock;
  Py_XINCREF(that.type_.get());
  type_.reset(that.type_.get());
  Py_XINCREF(that.value_.get());
  value_.reset(that.value_.get());
  Py_XINCREF(that.traceback_.get());
  traceback_.reset(that.traceback_.get());
  return *this;
}

Exception Exception::Fetch() {
  PythonLock::AssertHeld();
  PyObject* type;
  PyObject* value;
  PyObject* traceback;
  PyErr_Fetch(&type, &value, &traceback);
  PyErr_NormalizeException(&type, &value, &traceback);
  return Exception(type, value, traceback);
}

PyObject* Exception::Restore() const& {
  PythonLock::AssertHeld();
  Py_XINCREF(type_.get());
  Py_XINCREF(value_.get());
  Py_XINCREF(traceback_.get());
  PyErr_Restore(type_.get(), value_.get(), traceback_.get());
  return nullptr;
}

PyObject* Exception::Restore() && {
  PythonLock::AssertHeld();
  PyErr_Restore(type_.release(), value_.release(), traceback_.release());
  return nullptr;
}

std::string Exception::message() const {
  if (ok()) return "OK";
  PythonLock lock;
  RIEGELI_ASSERT(PyExceptionClass_Check(type_.get()))
      << "Expected an exception class, not " << Py_TYPE(type_.get())->tp_name;
  std::string message = PyExceptionClass_Name(type_.get());
  if (value_ == nullptr) return message;
  const PythonPtr str_result(PyObject_Str(value_.get()));
  if (ABSL_PREDICT_FALSE(str_result == nullptr)) {
    PyErr_Clear();
    absl::StrAppend(&message, ": <str() failed>");
    return message;
  }
  StrOrBytes str;
  if (ABSL_PREDICT_FALSE(!str.FromPython(str_result.get()))) {
    PyErr_Clear();
    absl::StrAppend(&message, ": <StrOrBytes::FromPython() failed>");
    return message;
  }
  if (!absl::string_view(str).empty()) {
    absl::StrAppend(&message, ": ", absl::string_view(str));
  }
  return message;
}

void SetRiegeliError(const absl::Status& status) {
  RIEGELI_ASSERT(!status.ok())
      << "Failed precondition of SetRiegeliError(): status not failed";
  PythonLock::AssertHeld();
  PythonPtr message = StringToPython(status.message());
  if (ABSL_PREDICT_FALSE(message == nullptr)) return;
  PyObject* type;
  switch (status.code()) {
#define HANDLE_CODE(name)                                     \
  case absl::StatusCode::k##name: {                           \
    static constexpr ImportedConstant k##name##Error(         \
        "riegeli.base.riegeli_error", #name "Error");         \
    if (ABSL_PREDICT_FALSE(!k##name##Error.Verify())) return; \
    type = k##name##Error.get();                              \
  } break

    // clang-format off
    HANDLE_CODE(Cancelled);
    default:
    HANDLE_CODE(Unknown);
    HANDLE_CODE(InvalidArgument);
    HANDLE_CODE(DeadlineExceeded);
    HANDLE_CODE(NotFound);
    HANDLE_CODE(AlreadyExists);
    HANDLE_CODE(PermissionDenied);
    HANDLE_CODE(Unauthenticated);
    HANDLE_CODE(ResourceExhausted);
    HANDLE_CODE(FailedPrecondition);
    HANDLE_CODE(Aborted);
    HANDLE_CODE(OutOfRange);
    HANDLE_CODE(Unimplemented);
    HANDLE_CODE(Internal);
    HANDLE_CODE(Unavailable);
    HANDLE_CODE(DataLoss);
      // clang-format on

#undef HANDLE_CODE
  }

  Py_INCREF(type);
  PyErr_Restore(type, message.release(), nullptr);
}

namespace py_internal {

namespace {

// A linked list of all objects of type `StaticObject` which have `value_`
// allocated, chained by their `next_` fields. This is used to free the objects
// on Python interpreter shutdown.
const StaticObject* all_static_objects = nullptr;

}  // namespace

void FreeStaticObjectsImpl() {
  const StaticObject* static_object =
      std::exchange(all_static_objects, nullptr);
  while (static_object != nullptr) {
    static_object->value_ = nullptr;
    static_object = std::exchange(static_object->next_, nullptr);
  }
}

// `extern "C"` sets the C calling convention for compatibility with the Python
// API. `static` avoids making symbols public, as `extern "C"` trumps anonymous
// namespace.
extern "C" {
static void FreeStaticObjects() { FreeStaticObjectsImpl(); }
}  // extern "C"

void StaticObject::RegisterThis() const {
  PythonLock::AssertHeld();
  if (all_static_objects == nullptr) {
    // This is the first registered `StaticObject` since `Py_Initialize()`.
    Py_AtExit(FreeStaticObjects);
  }
  next_ = std::exchange(all_static_objects, this);
}

bool ImportedCapsuleBase::ImportValue() const {
  // For some reason `PyImport_ImportModule()` is sometimes required before
  // `PyCapsule_Import()` for a module with a nested name.
  const size_t dot = absl::string_view(capsule_name_).rfind('.');
  RIEGELI_ASSERT_NE(dot, absl::string_view::npos)
      << "Capsule name does not contain a dot: " << capsule_name_;
  RIEGELI_CHECK(
      PyImport_ImportModule(std::string(capsule_name_, dot).c_str()) != nullptr)
      << Exception::Fetch().message();
  value_ = PyCapsule_Import(capsule_name_, false);
  return value_ != nullptr;
}

}  // namespace py_internal

bool Identifier::AllocateValue() const {
  value_ = StringToPython(name_).release();
  if (ABSL_PREDICT_FALSE(value_ == nullptr)) return false;
  PyUnicode_InternInPlace(&value_);
  RegisterThis();
  return true;
}

bool ImportedConstant::AllocateValue() const {
  const PythonPtr module_name = StringToPython(module_name_);
  if (ABSL_PREDICT_FALSE(module_name == nullptr)) return false;
  const PythonPtr module(PyImport_Import(module_name.get()));
  if (ABSL_PREDICT_FALSE(module == nullptr)) return false;
  const PythonPtr attr_name = StringToPython(attr_name_);
  if (ABSL_PREDICT_FALSE(attr_name == nullptr)) return false;
  value_ = PyObject_GetAttr(module.get(), attr_name.get());
  if (ABSL_PREDICT_FALSE(value_ == nullptr)) return false;
  RegisterThis();
  return true;
}

bool ExportCapsule(PyObject* module, const char* capsule_name,
                   const void* ptr) {
  PythonPtr capsule(
      PyCapsule_New(const_cast<void*>(ptr), capsule_name, nullptr));
  if (ABSL_PREDICT_FALSE(capsule == nullptr)) return false;
  const size_t dot = absl::string_view(capsule_name).rfind('.');
  RIEGELI_ASSERT_NE(dot, absl::string_view::npos)
      << "Capsule name does not contain a dot: " << capsule_name;
  RIEGELI_ASSERT(PyModule_Check(module))
      << "Expected a module, not " << Py_TYPE(module)->tp_name;
  RIEGELI_ASSERT_EQ(absl::string_view(PyModule_GetName(module)),
                    absl::string_view(capsule_name, dot))
      << "Module name mismatch";
  if (ABSL_PREDICT_FALSE(PyModule_AddObject(module, capsule_name + dot + 1,
                                            capsule.release()) < 0)) {
    return false;
  }
  return true;
}

MemoryView::~MemoryView() {
  if (object_ != nullptr && Py_REFCNT(object_.get()) > 1) {
    PyObject* value;
    PyObject* type;
    PyObject* traceback;
    PyErr_Fetch(&value, &type, &traceback);
    ReleaseInternal();
    PyErr_Restore(value, type, traceback);
  }
}

PyObject* MemoryView::ToPython(absl::string_view value) {
  RIEGELI_ASSERT_EQ(object_, nullptr)
      << "Failed precondition of MemoryView::ToPython(): "
         "called more than once";
  object_.reset(PyMemoryView_FromMemory(const_cast<char*>(value.data()),
                                        IntCast<Py_ssize_t>(value.size()),
                                        PyBUF_READ));
  return object_.get();
}

PyObject* MemoryView::MutableToPython(absl::Span<char> value) {
  RIEGELI_ASSERT_EQ(object_, nullptr)
      << "Failed precondition of MemoryView::MutableToPython(): "
         "called more than once";
  object_.reset(PyMemoryView_FromMemory(
      value.data(), IntCast<Py_ssize_t>(value.size()), PyBUF_WRITE));
  return object_.get();
}

bool MemoryView::Release() {
  bool release_ok = true;
  if (object_ != nullptr && Py_REFCNT(object_.get()) > 1) {
    release_ok = ReleaseInternal();
  }
  object_.reset();
  return release_ok;
}

inline bool MemoryView::ReleaseInternal() {
  static constexpr Identifier id_release("release");
  const PythonPtr release_result(
      PyObject_CallMethodObjArgs(object_.get(), id_release.get(), nullptr));
  return release_result != nullptr;
}

bool StrOrBytes::FromPython(PyObject* object ABSL_ATTRIBUTE_LIFETIME_BOUND) {
  RIEGELI_ASSERT_EQ(data_.data(), nullptr)
      << "Failed precondition of StrOrBytes::FromPython(): "
         "called more than once";
  if (PyUnicode_Check(object)) {
    Py_ssize_t length;
    const char* data = PyUnicode_AsUTF8AndSize(object, &length);
    if (ABSL_PREDICT_FALSE(data == nullptr)) return false;
    data_ = absl::string_view(data, IntCast<size_t>(length));
    return true;
  } else if (ABSL_PREDICT_FALSE(!PyBytes_Check(object))) {
    PyErr_Format(PyExc_TypeError, "Expected str or bytes, not %s",
                 Py_TYPE(object)->tp_name);
    return false;
  }
  data_ = absl::string_view(PyBytes_AS_STRING(object),
                            IntCast<size_t>(PyBytes_GET_SIZE(object)));
  return true;
}

PythonPtr ChainToPython(const Chain& value) {
  PythonPtr bytes(
      PyBytes_FromStringAndSize(nullptr, IntCast<Py_ssize_t>(value.size())));
  if (ABSL_PREDICT_FALSE(bytes == nullptr)) return nullptr;
  value.CopyTo(PyBytes_AS_STRING(bytes.get()));
  return bytes;
}

std::optional<Chain> ChainFromPython(PyObject* object) {
  Py_buffer buffer;
  if (ABSL_PREDICT_FALSE(PyObject_GetBuffer(object, &buffer, PyBUF_CONTIG_RO) <
                         0)) {
    return std::nullopt;
  }
  Chain result(absl::string_view(static_cast<const char*>(buffer.buf),
                                 IntCast<size_t>(buffer.len)));
  PyBuffer_Release(&buffer);
  return result;
}

PythonPtr SizeToPython(size_t value) {
  if (ABSL_PREDICT_FALSE(value >
                         std::numeric_limits<unsigned long long>::max())) {
    PyErr_Format(PyExc_OverflowError, "Size out of range: %zu", value);
    return nullptr;
  }
  return PythonPtr(
      PyLong_FromUnsignedLongLong(IntCast<unsigned long long>(value)));
}

std::optional<size_t> SizeFromPython(PyObject* object) {
  const PythonPtr index(PyNumber_Index(object));
  if (ABSL_PREDICT_FALSE(index == nullptr)) return std::nullopt;
  RIEGELI_ASSERT(PyLong_Check(index.get()))
      << "PyNumber_Index() returned an unexpected type: "
      << Py_TYPE(index.get())->tp_name;
  unsigned long long index_value = PyLong_AsUnsignedLongLong(index.get());
  if (ABSL_PREDICT_FALSE(index_value == static_cast<unsigned long long>(-1)) &&
      PyErr_Occurred()) {
    return std::nullopt;
  }
  if (ABSL_PREDICT_FALSE(index_value > std::numeric_limits<size_t>::max())) {
    PyErr_Format(PyExc_OverflowError, "Size out of range: %llu", index_value);
    return std::nullopt;
  }
  return IntCast<size_t>(index_value);
}

PythonPtr PositionToPython(Position value) {
  if (ABSL_PREDICT_FALSE(value >
                         std::numeric_limits<unsigned long long>::max())) {
    PyErr_Format(PyExc_OverflowError, "Position out of range: %ju",
                 uintmax_t{value});
    return nullptr;
  }
  return PythonPtr(
      PyLong_FromUnsignedLongLong(IntCast<unsigned long long>(value)));
}

std::optional<Position> PositionFromPython(PyObject* object) {
  const PythonPtr index(PyNumber_Index(object));
  if (ABSL_PREDICT_FALSE(index == nullptr)) return std::nullopt;
  RIEGELI_ASSERT(PyLong_Check(index.get()))
      << "PyNumber_Index() returned an unexpected type: "
      << Py_TYPE(index.get())->tp_name;
  const unsigned long long index_value = PyLong_AsUnsignedLongLong(index.get());
  if (ABSL_PREDICT_FALSE(index_value == static_cast<unsigned long long>(-1)) &&
      PyErr_Occurred()) {
    return std::nullopt;
  }
  if (ABSL_PREDICT_FALSE(index_value > std::numeric_limits<Position>::max())) {
    PyErr_Format(PyExc_OverflowError, "Position out of range: %llu",
                 index_value);
    return std::nullopt;
  }
  return IntCast<Position>(index_value);
}

PythonPtr PartialOrderingToPython(PartialOrdering ordering) {
  if (ordering == PartialOrdering::unordered) {
    return Py_INCREF(Py_None), PythonPtr(Py_None);
  }
  return PythonPtr(PyLong_FromLong(ordering < 0 ? -1 : ordering == 0 ? 0 : 1));
}

std::optional<PartialOrdering> PartialOrderingFromPython(PyObject* object) {
  if (object == Py_None) return PartialOrdering::unordered;
  const long long_value = PyLong_AsLong(object);
  if (ABSL_PREDICT_FALSE(long_value == -1) && PyErr_Occurred()) {
    return std::nullopt;
  }
  return riegeli::Compare(long_value, 0);
}

}  // namespace riegeli::python


================================================
FILE: python/riegeli/base/utils.h
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef PYTHON_RIEGELI_BASE_UTILS_H_
#define PYTHON_RIEGELI_BASE_UTILS_H_

// From https://docs.python.org/3/c-api/intro.html:
// Since Python may define some pre-processor definitions which affect the
// standard headers on some systems, you must include Python.h before any
// standard headers are included.
#include <Python.h>
// clang-format: do not reorder the above include.

#include <stddef.h>

#include <memory>
#include <new>
#include <optional>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/types.h"

namespace riegeli::python {

// Ensures that Python GIL is locked. Reentrant.
//
// Same as `PyGILState_Ensure()` / `PyGILState_Release()`.
class PythonLock {
 public:
  static void AssertHeld() {
    RIEGELI_ASSERT(PyGILState_Check()) << "Python GIL was assumed to be held";
  }

  PythonLock() { gstate_ = PyGILState_Ensure(); }

  PythonLock(const PythonLock&) = delete;
  PythonLock& operator=(const PythonLock&) = delete;

  ~PythonLock() { PyGILState_Release(gstate_); }

 private:
  PyGILState_STATE gstate_;
};

// Unlocks Python GIL, allowing non-Python threads to run.
//
// Same as `Py_BEGIN_ALLOW_THREADS` / `Py_END_ALLOW_THREADS`.
class PythonUnlock {
 public:
  PythonUnlock() {
    PythonLock::AssertHeld();
    tstate_ = PyEval_SaveThread();
  }

  PythonUnlock(const PythonUnlock&) = delete;
  PythonUnlock& operator=(const PythonUnlock&) = delete;

  ~PythonUnlock() { PyEval_RestoreThread(tstate_); }

 private:
  PyThreadState* tstate_;
};

// Apply a function with Python GIL unlocked, allowing non-Python threads to
// run.
//
// Same as `Py_BEGIN_ALLOW_THREADS` / `Py_END_ALLOW_THREADS`.
template <typename Function>
std::invoke_result_t<Function> PythonUnlocked(Function&& f) {
  PythonUnlock unlock;
  return std::forward<Function>(f)();
}

// Owned `PyObject` which assumes that Python GIL is held.

struct Deleter {
  template <typename T>
  void operator()(T* ptr) const {
    PythonLock::AssertHeld();
    Py_DECREF(ptr);
  }
};

using PythonPtr = std::unique_ptr<PyObject, Deleter>;

// Owned `PyObject` which does not assume that Python GIL is held.

struct LockingDeleter {
  template <typename T>
  void operator()(T* ptr) const {
    PythonLock lock;
    Py_DECREF(ptr);
  }
};

using PythonPtrLocking = std::unique_ptr<PyObject, LockingDeleter>;

// Allows a C++ object to be safely embedded in a Python object allocated with
// `PyType_GenericAlloc()`.
//
// `PythonWrapped<T>` is similar to `std::optional<T>`, but:
//  * `PythonWrapped<T>` is POD.
//  * `PythonWrapped<T>` supports only a subset of `std::optional<T>` API.
//  * `PythonWrapped<T>` filled with zero bytes is valid and absent
//    (`PyType_GenericAlloc()` fills the Python object with zero bytes).
//  * `PythonWrapped<T>` should be explicitly `reset()` in the implementation of
//    `tp_dealloc` (there is no C++ destructor).
template <typename T>
class PythonWrapped {
 public:
  static_assert(alignof(T) <= alignof(max_align_t),
                "PythonWrapped does not support overaligned types");

  template <typename... Args>
  ABSL_ATTRIBUTE_REINITIALIZES void emplace(Args&&... args) {
    if (has_value_) {
      get()->~T();
    } else {
      has_value_ = true;
    }
    new (storage_) T(std::forward<Args>(args)...);
  }

  ABSL_ATTRIBUTE_REINITIALIZES void reset() {
    if (has_value_) {
      get()->~T();
      has_value_ = false;
    }
  }

  bool has_value() const { return has_value_; }

  T* get() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT(has_value_) << "Object uninitialized";
    return std::launder(reinterpret_cast<T*>(storage_));
  }
  const T* get() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT(has_value_) << "Object uninitialized";
    return std::launder(reinterpret_cast<const T*>(storage_));
  }
  T& operator*() ABSL_ATTRIBUTE_LIFETIME_BOUND { return *get(); }
  const T& operator*() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return *get(); }
  T* operator->() ABSL_ATTRIBUTE_LIFETIME_BOUND { return get(); }
  const T* operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return get(); }

  bool Verify() const {
    PythonLock::AssertHeld();
    if (ABSL_PREDICT_FALSE(!has_value())) {
      PyErr_SetString(PyExc_ValueError, "Object uninitialized");
      return false;
    }
    return true;
  }

 private:
  bool has_value_;
  alignas(T) char storage_[sizeof(T)];
};

// Represents an optional Python exception being raised.
class Exception {
 public:
  // No exception.
  Exception() = default;

  Exception(const Exception& that) noexcept;
  Exception& operator=(const Exception& that) noexcept;

  Exception(Exception&& that) = default;
  Exception& operator=(Exception&& that) = default;

  // Fetches the active Python exception.
  static Exception Fetch();

  // Restores the active Python exception.
  PyObject* Restore() const&;
  PyObject* Restore() &&;

  bool ok() const { return type_ == nullptr; }

  std::string message() const;

  // For implementing `tp_traverse` of objects containing `Exception`.
  int Traverse(visitproc visit, void* arg);

 private:
  // Steals references.
  explicit Exception(PyObject* type, PyObject* value, PyObject* traceback)
      : type_(type), value_(value), traceback_(traceback) {}

  PythonPtrLocking type_;
  PythonPtrLocking value_;
  PythonPtrLocking traceback_;
};

// Translate a failed status to the active Python exception, a class extending
// `RiegeliError`.
void SetRiegeliError(const absl::Status& status);

namespace py_internal {

// Lazily initialized pointer to a Python object, persisting until interpreter
// shutdown.
class StaticObject {
 protected:
  mutable PyObject* value_ = nullptr;
  mutable const StaticObject* next_ = nullptr;

  // Register this object in a global list of static objects. This must be
  // called when `value_` is allocated.
  void RegisterThis() const;

 private:
  friend void FreeStaticObjectsImpl();
};

// Template parameter independent part of `ImportedCapsule`.
class ImportedCapsuleBase {
 public:
  // Forces importing the value, returning `false` on failures (with Python
  // exception set).
  //
  // If `Verify()` returns `true`, `get()` does not die.
  bool Verify() const {
    PythonLock::AssertHeld();
    if (ABSL_PREDICT_FALSE(value_ == nullptr)) return ImportValue();
    return true;
  }

 protected:
  explicit constexpr ImportedCapsuleBase(const char* capsule_name)
      : capsule_name_(capsule_name) {}

  ~ImportedCapsuleBase() = default;

  bool ImportValue() const;

  mutable void* value_ = nullptr;

 private:
  const char* capsule_name_;
};

}  // namespace py_internal

// Creates a Python string (type `str`) which persists until interpreter
// shutdown. This is useful for attribute or method names in
// `PyObject_GetAttr()` or `PyObject_CallMethodObjArgs()`.
//
// An instance of `Identifier` should be allocated statically:
// ```
//   static constexpr Identifier id_write("write");
// ```
//
// Then `id_write.get()` is a borrowed reference to the Python object.
class Identifier : public py_internal::StaticObject {
 public:
  explicit constexpr Identifier(absl::string_view name) : name_(name) {}

  // Forces allocating the value, returning `false` on failures (with Python
  // exception set).
  //
  // If `Verify()` returns `true`, `get()` does not die.
  bool Verify() const {
    PythonLock::AssertHeld();
    if (ABSL_PREDICT_FALSE(value_ == nullptr)) return AllocateValue();
    return true;
  }

  // Returns the value, allocating it on the first call. Dies on failure
  // (use `Verify()` to prevent this).
  PyObject* get() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    PythonLock::AssertHeld();
    if (ABSL_PREDICT_FALSE(value_ == nullptr)) {
      RIEGELI_CHECK(AllocateValue()) << Exception::Fetch().message();
    }
    return value_;
  }

 private:
  bool AllocateValue() const;

  absl::string_view name_;
};

// Imports a Python module and gets its attribute, which persists until
// interpreter shutdown.
//
// An instance of `ImportedConstant` should be allocated statically:
// ```
//   static constexpr ImportedConstant kRiegeliError(
//        "riegeli.base.riegeli_error", "RiegeliError");
// ```
//
// Then `kRiegeliError.get()` is a borrowed reference to the Python object.
class ImportedConstant : public py_internal::StaticObject {
 public:
  explicit constexpr ImportedConstant(absl::string_view module_name,
                                      absl::string_view attr_name)
      : module_name_(module_name), attr_name_(attr_name) {}

  // Forces importing the value, returning `false` on failures (with Python
  // exception set).
  //
  // If `Verify()` returns `true`, `get()` does not die.
  bool Verify() const {
    PythonLock::AssertHeld();
    if (ABSL_PREDICT_FALSE(value_ == nullptr)) return AllocateValue();
    return true;
  }

  // Returns the value, importing it on the first call. Dies on failure
  // (use `Verify()` to prevent this).
  PyObject* get() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    PythonLock::AssertHeld();
    if (ABSL_PREDICT_FALSE(value_ == nullptr)) {
      RIEGELI_CHECK(AllocateValue()) << Exception::Fetch().message();
    }
    return value_;
  }

 private:
  bool AllocateValue() const;

  absl::string_view module_name_;
  absl::string_view attr_name_;
};

// Exports a Python capsule containing a C++ pointer, which should be valid
// forever, by adding it to the given module.
//
// `capsule_name` must be "module_name.attr_name" with `module_name`
// corresponding to `PyModule_GetName(module)`.
//
// Returns `false` on failure (with Python exception set).
bool ExportCapsule(PyObject* module, const char* capsule_name, const void* ptr);

// Imports a Python capsule and gets its stored pointer, which persists forever.
//
// `capsule_name must` be "module_name.attr_name".
//
// An instance of `ImportedCapsule` should be allocated statically:
// ```
//   static constexpr ImportedCapsule<RecordPositionApi> kRecordPositionApi(
//       "riegeli.records.record_position._CPPAPI");
// ```
//
// Then `kRecordPositionApi.get()` is a pointer stored in the capsule.
template <typename T>
class ImportedCapsule : public py_internal::ImportedCapsuleBase {
 public:
  explicit constexpr ImportedCapsule(const char* capsule_name)
      : ImportedCapsuleBase(capsule_name) {}

  // Returns the value, importing it on the first call. Dies on failure
  // (use `Verify()` to prevent this).
  const T* get() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    PythonLock::AssertHeld();
    if (ABSL_PREDICT_FALSE(value_ == nullptr)) {
      RIEGELI_CHECK(ImportValue()) << Exception::Fetch().message();
    }
    return static_cast<const T*>(value_);
  }

  const T& operator*() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return *get(); }
  const T* operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return get(); }
};

// Converts C++ `long` to a Python `int` object.
//
// Returns `nullptr` on failure (with Python exception set).
inline PythonPtr IntToPython(long value) {
  return PythonPtr(PyLong_FromLong(value));
}

// Converts C++ `absl::string_view` to a Python `bytes` object.
//
// Returns `nullptr` on failure (with Python exception set).
inline PythonPtr BytesToPython(absl::string_view value) {
  return PythonPtr(PyBytes_FromStringAndSize(
      value.data(), IntCast<Py_ssize_t>(value.size())));
}

// Converts C++ array of bytes to a Python `memoryview` object.
//
// Memory is shared. The C++ memory must be valid as long as the Python object
// is needed.
class MemoryView {
 public:
  MemoryView() = default;

  MemoryView(const MemoryView&) = delete;
  MemoryView& operator=(const MemoryView&) = delete;

  // Calls `Release()`, ignoring its result, without disturbing the Python
  // exception state.
  ~MemoryView();

  // Creates and returns a read-only `memoryview` object.
  //
  // Returns `nullptr` on failure (with Python exception set).
  //
  // `ToPython()` or `MutableToPython()` must be called at most once for each
  // `MemoryView` object.
  PyObject* ToPython(absl::string_view value);

  // Creates and returns a mutable `memoryview` object.
  //
  // Returns `nullptr` on failure (with Python exception set).
  //
  // `ToPython()` or `MutableToPython()` must be called at most once for each
  // `MemoryView` object.
  PyObject* MutableToPython(absl::Span<char> value);

  // If a reference to the `memoryview` has been stored elsewhere, calls
  // `memoryview.release()` to mark the `memoryview` as invalid.
  //
  // Returns `false` on failure (with Python exception set).
  bool Release();

 private:
  bool ReleaseInternal();

  PythonPtr object_;
};

// Refers to internals of a Python `bytes`-like object, using the buffer
// protocol.
class BytesLike {
 public:
  BytesLike() noexcept { buffer_.obj = nullptr; }

  BytesLike(const BytesLike&) = delete;
  BytesLike& operator=(const BytesLike&) = delete;

  ~BytesLike() {
    PythonLock::AssertHeld();
    if (buffer_.obj != nullptr) PyBuffer_Release(&buffer_);
  }

  // Converts from a Python object.
  //
  // Returns `false` on failure (with Python exception set).
  //
  // Must be called at most once for each `BytesLike` object.
  bool FromPython(PyObject* object) {
    RIEGELI_ASSERT_EQ(buffer_.obj, nullptr)
        << "Failed precondition of BytesLike::FromPython(): "
           "called more than once";
    return PyObject_GetBuffer(object, &buffer_, PyBUF_CONTIG_RO) == 0;
  }

  // Returns the binary contents.
  /*implicit*/ operator absl::string_view() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return absl::string_view(static_cast<const char*>(buffer_.buf),
                             IntCast<size_t>(buffer_.len));
  }

 private:
  Py_buffer buffer_;
};

// Converts C++ `absl::string_view` to a Python `str` object. Unicode is
// converted from UTF-8.
//
// Returns `nullptr` on failure (with Python exception set).
inline PythonPtr StringToPython(absl::string_view value) {
  return PythonPtr(PyUnicode_FromStringAndSize(
      value.data(), IntCast<Py_ssize_t>(value.size())));
}

// Refers to internals of a Python object representing text. Valid Python
// objects are `str` or `bytes`. Unicode is converted to UTF-8.
class StrOrBytes {
 public:
  StrOrBytes() noexcept {}

  StrOrBytes(const StrOrBytes&) = delete;
  StrOrBytes& operator=(const StrOrBytes&) = delete;

  // Converts from a Python object.
  //
  // Returns `false` on failure (with Python exception set).
  //
  // Must be called at most once for each `StrOrBytes` object.
  bool FromPython(PyObject* object ABSL_ATTRIBUTE_LIFETIME_BOUND);

  // Returns the text contents.
  /*implicit*/ operator absl::string_view() const { return data_; }

 private:
  absl::string_view data_;
};

// Converts C++ `Chain` to a Python `bytes` object.
//
// Returns `nullptr` on failure (with Python exception set).
PythonPtr ChainToPython(const Chain& value);

// Converts a Python `bytes`-like object to C++ `Chain`, using the buffer
// protocol.
//
// Returns `std::nullopt` on failure (with Python exception set).
std::optional<Chain> ChainFromPython(PyObject* object);

// Converts C++ `size_t` to a Python `int` object.
//
// Returns `nullptr` on failure (with Python exception set).
PythonPtr SizeToPython(size_t value);

// Converts a Python object to C++ `size_t`. Valid Python objects are the same
// as for slicing: `int` or objects supporting `__index__()`.
//
// Returns `std::nullopt` on failure (with Python exception set).
std::optional<size_t> SizeFromPython(PyObject* object);

// Converts C++ `Position` to a Python `int` object.
//
// Returns `nullptr` on failure (with Python exception set).
PythonPtr PositionToPython(Position value);

// Converts a Python object to C++ `Position`. Valid Python objects are the same
// as for slicing: `int` or objects supporting `__index__()`.
//
// Returns `std::nullopt` on failure (with Python exception set).
std::optional<Position> PositionFromPython(PyObject* object);

// Converts C++ `PartialOrdering` to a Python `None` (for `unordered`) or `int`
// object (-1 for `less`, 0 for `equivalent`, or 1 for `greater`).
//
// Returns `nullptr` on failure (with Python exception set).
PythonPtr PartialOrderingToPython(PartialOrdering ordering);

// Converts a Python object to C++ `PartialOrdering`. Valid Python objects are
// `int` (compared with 0) or `None`.
//
// Returns `std::nullopt` on failure (with Python exception set).
std::optional<PartialOrdering> PartialOrderingFromPython(PyObject* object);

// Implementation details follow.

inline Exception::Exception(const Exception& that) noexcept { *this = that; }

inline int Exception::Traverse(visitproc visit, void* arg) {
  Py_VISIT(type_.get());
  Py_VISIT(value_.get());
  Py_VISIT(traceback_.get());
  return 0;
}

}  // namespace riegeli::python

#endif  // PYTHON_RIEGELI_BASE_UTILS_H_


================================================
FILE: python/riegeli/bytes/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//python/riegeli:__subpackages__"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "python_reader",
    srcs = ["python_reader.cc"],
    hdrs = ["python_reader.h"],
    # python_reader.cc has #define before #include to influence what the
    # included files provide.
    features = ["-use_header_modules"],
    deps = [
        "//python/riegeli/base:utils",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:global",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "//riegeli/bytes:buffer_options",
        "//riegeli/bytes:buffered_reader",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/numeric:bits",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@rules_python//python/cc:current_py_cc_headers",
    ],
)

cc_library(
    name = "python_writer",
    srcs = ["python_writer.cc"],
    hdrs = ["python_writer.h"],
    # python_writer.cc has #define before #include to influence what the
    # included files provide.
    features = ["-use_header_modules"],
    deps = [
        "//python/riegeli/base:utils",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:global",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "//riegeli/bytes:buffer_options",
        "//riegeli/bytes:buffered_writer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/numeric:bits",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@rules_python//python/cc:current_py_cc_headers",
    ],
)


================================================
FILE: python/riegeli/bytes/python_reader.cc
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// From https://docs.python.org/3/c-api/intro.html:
// Since Python may define some pre-processor definitions which affect the
// standard headers on some systems, you must include Python.h before any
// standard headers are included.
#define PY_SSIZE_T_CLEAN
#include <Python.h>
// clang-format: do not reorder the above include.

#include "python/riegeli/bytes/python_reader.h"
// clang-format: do not reorder the above include.

#include <stddef.h>

#include <cstring>
#include <limits>
#include <optional>

#include "absl/base/optimization.h"
#include "absl/numeric/bits.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "python/riegeli/base/utils.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/global.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_reader.h"

namespace riegeli::python {

PythonReader::PythonReader(PyObject* src, Options options)
    : BufferedReader(options.buffer_options()), owns_src_(options.owns_src()) {
  PythonLock::AssertHeld();
  Py_INCREF(src);
  src_.reset(src);
  if (options.assumed_pos() != std::nullopt) {
    set_limit_pos(*options.assumed_pos());
    // `supports_random_access_` is left as `false`.
    random_access_status_ = Global([] {
      return absl::UnimplementedError(
          "PythonReader::Options::assumed_pos() excludes random access");
    });
  } else {
    static constexpr Identifier id_seekable("seekable");
    const PythonPtr seekable_result(
        PyObject_CallMethodObjArgs(src_.get(), id_seekable.get(), nullptr));
    if (ABSL_PREDICT_FALSE(seekable_result == nullptr)) {
      FailOperation("seekable()");
      return;
    }
    const int seekable_is_true = PyObject_IsTrue(seekable_result.get());
    if (ABSL_PREDICT_FALSE(seekable_is_true < 0)) {
      FailOperation("PyObject_IsTrue() after seekable()");
      return;
    }
    if (seekable_is_true == 0) {
      // Random access is not supported. Assume 0 as the initial position.
      // `supports_random_access_` is left as `false`.
      random_access_status_ = Global([] {
        return absl::UnimplementedError(
            "seekable() is False which excludes random access");
      });
      return;
    }
    static constexpr Identifier id_tell("tell");
    const PythonPtr tell_result(
        PyObject_CallMethodObjArgs(src_.get(), id_tell.get(), nullptr));
    if (ABSL_PREDICT_FALSE(tell_result == nullptr)) {
      FailOperation("tell()");
      return;
    }
    const std::optional<Position> file_pos =
        PositionFromPython(tell_result.get());
    if (ABSL_PREDICT_FALSE(file_pos == std::nullopt)) {
      FailOperation("PositionFromPython() after tell()");
      return;
    }
    set_limit_pos(*file_pos);
    supports_random_access_ = true;
  }
  BeginRun();
}

void PythonReader::Done() {
  BufferedReader::Done();
  random_access_status_ = absl::OkStatus();
  if (owns_src_ && src_ != nullptr) {
    PythonLock lock;
    static constexpr Identifier id_close("close");
    const PythonPtr close_result(
        PyObject_CallMethodObjArgs(src_.get(), id_close.get(), nullptr));
    if (ABSL_PREDICT_FALSE(close_result == nullptr)) FailOperation("close()");
  }
}

inline bool PythonReader::FailOperation(absl::string_view operation) {
  RIEGELI_ASSERT(is_open())
      << "Failed precondition of PythonReader::FailOperation(): "
         "Object closed";
  PythonLock::AssertHeld();
  if (ABSL_PREDICT_FALSE(!ok())) {
    // Ignore this error because `PythonReader` already failed.
    PyErr_Clear();
    return false;
  }
  exception_ = Exception::Fetch();
  return Fail(absl::UnknownError(
      absl::StrCat(operation, " failed: ", exception_.message())));
}

bool PythonReader::ReadInternal(size_t min_length, size_t max_length,
                                char* dest) {
  RIEGELI_ASSERT_GT(min_length, 0u)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "nothing to read";
  RIEGELI_ASSERT_GE(max_length, min_length)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "max_length < min_length";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedReader::ReadInternal()";
  PythonLock lock;
  // Find a read function to use, preferring in order: `readinto1()`,
  // `readinto()`, `read1()`, `read()`.
  if (ABSL_PREDICT_FALSE(read_function_ == nullptr)) {
    static constexpr Identifier id_readinto1("readinto1");
    read_function_.reset(PyObject_GetAttr(src_.get(), id_readinto1.get()));
    read_function_name_ = "readinto1()";
    if (read_function_ == nullptr) {
      if (ABSL_PREDICT_FALSE(!PyErr_ExceptionMatches(PyExc_AttributeError))) {
        return FailOperation(read_function_name_);
      }
      PyErr_Clear();
      static constexpr Identifier id_readinto("readinto");
      read_function_.reset(PyObject_GetAttr(src_.get(), id_readinto.get()));
      read_function_name_ = "readinto()";
      if (read_function_ == nullptr) {
        if (ABSL_PREDICT_FALSE(!PyErr_ExceptionMatches(PyExc_AttributeError))) {
          return FailOperation(read_function_name_);
        }
        PyErr_Clear();
        use_bytes_ = true;
        static constexpr Identifier id_read1("read1");
        read_function_.reset(PyObject_GetAttr(src_.get(), id_read1.get()));
        read_function_name_ = "read1()";
        if (read_function_ == nullptr) {
          if (ABSL_PREDICT_FALSE(
                  !PyErr_ExceptionMatches(PyExc_AttributeError))) {
            return FailOperation(read_function_name_);
          }
          PyErr_Clear();
          static constexpr Identifier id_read("read");
          read_function_.reset(PyObject_GetAttr(src_.get(), id_read.get()));
          read_function_name_ = "read()";
          if (ABSL_PREDICT_FALSE(read_function_ == nullptr)) {
            return FailOperation(read_function_name_);
          }
        }
      }
    }
  }
  for (;;) {
    if (ABSL_PREDICT_FALSE(limit_pos() ==
                           std::numeric_limits<Position>::max())) {
      return FailOverflow();
    }
    const size_t length_to_read = UnsignedMin(
        max_length, std::numeric_limits<Position>::max() - limit_pos(),
        absl::bit_floor(size_t{std::numeric_limits<Py_ssize_t>::max()}));
    size_t length_read;
    if (!use_bytes_) {
      PythonPtr read_result;
      {
        // Prefer using `readinto1()` or `readinto()` to avoid copying memory.
        MemoryView memory_view;
        PyObject* const memory_view_object =
            memory_view.MutableToPython(absl::MakeSpan(dest, length_to_read));
        if (ABSL_PREDICT_FALSE(memory_view_object == nullptr)) {
          return FailOperation("MemoryView::MutableToPython()");
        }
        read_result.reset(PyObject_CallFunctionObjArgs(
            read_function_.get(), memory_view_object, nullptr));
        if (ABSL_PREDICT_FALSE(read_result == nullptr)) {
          return FailOperation(read_function_name_);
        }
        if (ABSL_PREDICT_FALSE(!memory_view.Release())) {
          return FailOperation("MemoryView::Release()");
        }
      }
      const std::optional<size_t> length_read_opt =
          SizeFromPython(read_result.get());
      if (ABSL_PREDICT_FALSE(length_read_opt == std::nullopt)) {
        return FailOperation(
            absl::StrCat("SizeFromPython() after ", read_function_name_));
      }
      length_read = *length_read_opt;
      if (ABSL_PREDICT_FALSE(length_read == 0)) return false;
      if (ABSL_PREDICT_FALSE(length_read > max_length)) {
        return Fail(absl::InternalError(
            absl::StrCat(read_function_name_, " read more than requested")));
      }
    } else {
      const PythonPtr length(SizeToPython(length_to_read));
      if (ABSL_PREDICT_FALSE(length == nullptr)) {
        return FailOperation("SizeToPython()");
      }
      const PythonPtr read_result(PyObject_CallFunctionObjArgs(
          read_function_.get(), length.get(), nullptr));
      if (ABSL_PREDICT_FALSE(read_result == nullptr)) {
        return FailOperation(read_function_name_);
      }
      Py_buffer buffer;
      if (ABSL_PREDICT_FALSE(PyObject_GetBuffer(read_result.get(), &buffer,
                                                PyBUF_CONTIG_RO) < 0)) {
        return FailOperation(
            absl::StrCat("PyObject_GetBuffer() after ", read_function_name_));
      }
      if (ABSL_PREDICT_FALSE(buffer.len == 0)) {
        PyBuffer_Release(&buffer);
        return false;
      }
      if (ABSL_PREDICT_FALSE(IntCast<size_t>(buffer.len) > max_length)) {
        PyBuffer_Release(&buffer);
        return Fail(absl::InternalError(
            absl::StrCat(read_function_name_, " read more than requested")));
      }
      std::memcpy(dest, buffer.buf, IntCast<size_t>(buffer.len));
      length_read = IntCast<size_t>(buffer.len);
      PyBuffer_Release(&buffer);
    }
    move_limit_pos(length_read);
    if (length_read >= min_length) return true;
    dest += length_read;
    min_length -= length_read;
    max_length -= length_read;
  }
}

bool PythonReader::SeekBehindBuffer(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!PythonReader::SupportsRandomAccess())) {
    if (ABSL_PREDICT_FALSE(new_pos < start_pos())) {
      if (ok()) Fail(random_access_status_);
      return false;
    }
    return BufferedReader::SeekBehindBuffer(new_pos);
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  PythonLock lock;
  if (new_pos > limit_pos()) {
    // Seeking forwards.
    const std::optional<Position> size = SizeInternal();
    if (ABSL_PREDICT_FALSE(size == std::nullopt)) return false;
    if (ABSL_PREDICT_FALSE(new_pos > *size)) {
      // File ends.
      set_limit_pos(*size);
      return false;
    }
  }
  set_limit_pos(new_pos);
  const PythonPtr file_pos = PositionToPython(limit_pos());
  if (ABSL_PREDICT_FALSE(file_pos == nullptr)) {
    return FailOperation("PositionToPython()");
  }
  static constexpr Identifier id_seek("seek");
  const PythonPtr seek_result(PyObject_CallMethodObjArgs(
      src_.get(), id_seek.get(), file_pos.get(), nullptr));
  if (ABSL_PREDICT_FALSE(seek_result == nullptr)) {
    return FailOperation("seek()");
  }
  return true;
}

inline std::optional<Position> PythonReader::SizeInternal() {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of PythonReader::SizeInternal()";
  RIEGELI_ASSERT(PythonReader::SupportsRandomAccess())
      << "Failed precondition of PythonReader::SizeInternal(): "
         "random access not supported";
  PythonLock::AssertHeld();
  absl::string_view operation;
  const PythonPtr file_pos = PositionToPython(0);
  if (ABSL_PREDICT_FALSE(file_pos == nullptr)) {
    FailOperation("PositionToPython()");
    return std::nullopt;
  }
  const PythonPtr whence = IntToPython(2);  // `io.SEEK_END`
  if (ABSL_PREDICT_FALSE(whence == nullptr)) {
    FailOperation("IntToPython()");
    return std::nullopt;
  }
  static constexpr Identifier id_seek("seek");
  PythonPtr result(PyObject_CallMethodObjArgs(
      src_.get(), id_seek.get(), file_pos.get(), whence.get(), nullptr));
  if (result.get() == Py_None) {
    // Python2 `file.seek()` returns `None`, so `tell()` is needed to get the
    // new position. Python2 is dead, but some classes still behave like that.
    static constexpr Identifier id_tell("tell");
    result.reset(
        PyObject_CallMethodObjArgs(src_.get(), id_tell.get(), nullptr));
    operation = "tell()";
  } else {
    // `io.IOBase.seek()` returns the new position.
    operation = "seek()";
  }
  if (ABSL_PREDICT_FALSE(result == nullptr)) {
    FailOperation(operation);
    return std::nullopt;
  }
  const std::optional<Position> size = PositionFromPython(result.get());
  if (ABSL_PREDICT_FALSE(size == std::nullopt)) {
    FailOperation(absl::StrCat("PositionFromPython() after ", operation));
    return std::nullopt;
  }
  return *size;
}

std::optional<Position> PythonReader::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!PythonReader::SupportsRandomAccess())) {
    if (ok()) Fail(random_access_status_);
    return std::nullopt;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  PythonLock lock;
  const std::optional<Position> size = SizeInternal();
  if (ABSL_PREDICT_FALSE(size == std::nullopt)) return std::nullopt;
  const PythonPtr file_pos = PositionToPython(limit_pos());
  if (ABSL_PREDICT_FALSE(file_pos == nullptr)) {
    FailOperation("PositionToPython()");
    return std::nullopt;
  }
  static constexpr Identifier id_seek("seek");
  const PythonPtr seek_result(PyObject_CallMethodObjArgs(
      src_.get(), id_seek.get(), file_pos.get(), nullptr));
  if (ABSL_PREDICT_FALSE(seek_result == nullptr)) {
    FailOperation("seek()");
    return std::nullopt;
  }
  return *size;
}

}  // namespace riegeli::python


================================================
FILE: python/riegeli/bytes/python_reader.h
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef PYTHON_RIEGELI_BYTES_PYTHON_READER_H_
#define PYTHON_RIEGELI_BYTES_PYTHON_READER_H_

// From https://docs.python.org/3/c-api/intro.html:
// Since Python may define some pre-processor definitions which affect the
// standard headers on some systems, you must include Python.h before any
// standard headers are included.
#include <Python.h>
// clang-format: do not reorder the above include.

#include <stddef.h>

#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "python/riegeli/base/utils.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_reader.h"

namespace riegeli::python {

// A `Reader` which reads from a Python binary I/O stream.
//
// The stream must support:
//  * `close()`          - for `Close()` if `Options::owns_src()`
//  * `readinto1(memoryview)` or
//    `readinto(memoryview)` or
//    `read1(int)` or
//    `read(int)`
//  * `seekable()`
//  * `seek(int[, int])` - for `Seek()` or `Size()`
//  * `tell()`           - for `Seek()` or `Size()`
//
// `PythonReader` supports random access if
// `Options::assumed_pos() == std::nullopt` and the stream supports random
// access (this is checked by calling `seekable()`).
//
// Warning: if random access is not supported and the stream is not owned,
// it will have an unpredictable amount of extra data consumed because of
// buffering.
class PythonReader : public BufferedReader {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // If `true`, `PythonReader::Close()` closes the stream.
    //
    // Default: `false`.
    Options& set_owns_src(bool owns_src) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      owns_src_ = owns_src;
      return *this;
    }
    Options&& set_owns_src(bool owns_src) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_owns_src(owns_src));
    }
    bool owns_src() const { return owns_src_; }

    // If `std::nullopt`, the current position reported by `pos()` corresponds
    // to the current stream position if possible, otherwise 0 is assumed as the
    // initial position. Random access is supported if the stream supports
    // random access.
    //
    // If not `std::nullopt`, this position is assumed initially, to be reported
    // by `pos()`. It does not need to correspond to the current stream
    // position. Random access is not supported.
    //
    // Default: `std::nullopt`.
    Options& set_assumed_pos(std::optional<Position> assumed_pos) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      assumed_pos_ = assumed_pos;
      return *this;
    }
    Options&& set_assumed_pos(std::optional<Position> assumed_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_assumed_pos(assumed_pos));
    }
    std::optional<Position> assumed_pos() const { return assumed_pos_; }

   private:
    bool owns_src_ = false;
    std::optional<Position> assumed_pos_;
  };

  // Creates a closed `PythonReader`.
  explicit PythonReader(Closed) noexcept : BufferedReader(kClosed) {}

  // Will read from `src`.
  explicit PythonReader(PyObject* src, Options options = Options());

  PythonReader(PythonReader&& that) noexcept;
  PythonReader& operator=(PythonReader&& that) noexcept;

  // Returns a borrowed reference to the stream being read from.
  PyObject* src() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.get(); }

  const Exception& exception() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return exception_;
  }

  bool ToleratesReadingAhead() override {
    return BufferedReader::ToleratesReadingAhead() ||
           PythonReader::SupportsRandomAccess();
  }
  bool SupportsRandomAccess() override { return supports_random_access_; }

  // For implementing `tp_traverse` of objects containing `PythonReader`.
  int Traverse(visitproc visit, void* arg);

 protected:
  void Done() override;
  bool ReadInternal(size_t min_length, size_t max_length, char* dest) override;
  bool SeekBehindBuffer(Position new_pos) override;
  std::optional<Position> SizeImpl() override;

 private:
  ABSL_ATTRIBUTE_COLD bool FailOperation(absl::string_view operation);
  std::optional<Position> SizeInternal();

  PythonPtrLocking src_;
  bool owns_src_ = false;
  bool supports_random_access_ = false;
  absl::Status random_access_status_;
  Exception exception_;
  PythonPtrLocking read_function_;
  absl::string_view read_function_name_;
  bool use_bytes_ = false;
};

inline PythonReader::PythonReader(PythonReader&& that) noexcept
    : BufferedReader(static_cast<BufferedReader&&>(that)),
      src_(std::move(that.src_)),
      owns_src_(that.owns_src_),
      supports_random_access_(
          std::exchange(that.supports_random_access_, false)),
      random_access_status_(std::move(that.random_access_status_)),
      exception_(std::move(that.exception_)),
      read_function_(std::move(that.read_function_)),
      read_function_name_(that.read_function_name_),
      use_bytes_(that.use_bytes_) {}

inline PythonReader& PythonReader::operator=(PythonReader&& that) noexcept {
  BufferedReader::operator=(static_cast<BufferedReader&&>(that));
  src_ = std::move(that.src_);
  owns_src_ = that.owns_src_;
  supports_random_access_ = std::exchange(that.supports_random_access_, false);
  random_access_status_ = std::move(that.random_access_status_);
  exception_ = std::move(that.exception_);
  read_function_ = std::move(that.read_function_);
  read_function_name_ = that.read_function_name_;
  use_bytes_ = that.use_bytes_;
  return *this;
}

inline int PythonReader::Traverse(visitproc visit, void* arg) {
  Py_VISIT(src_.get());
  Py_VISIT(read_function_.get());
  return exception_.Traverse(visit, arg);
}

}  // namespace riegeli::python

#endif  // PYTHON_RIEGELI_BYTES_PYTHON_READER_H_


================================================
FILE: python/riegeli/bytes/python_writer.cc
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// From https://docs.python.org/3/c-api/intro.html:
// Since Python may define some pre-processor definitions which affect the
// standard headers on some systems, you must include Python.h before any
// standard headers are included.
#define PY_SSIZE_T_CLEAN
#include <Python.h>
// clang-format: do not reorder the above include.

#include "python/riegeli/bytes/python_writer.h"
// clang-format: do not reorder the above include.

#include <stddef.h>

#include <limits>
#include <optional>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/numeric/bits.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "python/riegeli/base/utils.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/global.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_writer.h"

namespace riegeli::python {

PythonWriter::PythonWriter(PyObject* dest, Options options)
    : BufferedWriter(options.buffer_options()),
      owns_dest_(options.owns_dest()) {
  PythonLock::AssertHeld();
  Py_INCREF(dest);
  dest_.reset(dest);
  if (options.assumed_pos() != std::nullopt) {
    set_start_pos(*options.assumed_pos());
    // `supports_random_access_` is left as `false`.
    random_access_status_ = Global([] {
      return absl::UnimplementedError(
          "PythonWriter::Options::assumed_pos() excludes random access");
    });
  } else {
    static constexpr Identifier id_seekable("seekable");
    const PythonPtr seekable_result(
        PyObject_CallMethodObjArgs(dest_.get(), id_seekable.get(), nullptr));
    if (ABSL_PREDICT_FALSE(seekable_result == nullptr)) {
      FailOperation("seekable()");
      return;
    }
    const int seekable_is_true = PyObject_IsTrue(seekable_result.get());
    if (ABSL_PREDICT_FALSE(seekable_is_true < 0)) {
      FailOperation("PyObject_IsTrue() after seekable()");
      return;
    }
    if (seekable_is_true == 0) {
      // Random access is not supported. Assume 0 as the initial position.
      // `supports_random_access_` is left as `false`.
      random_access_status_ = Global([] {
        return absl::UnimplementedError(
            "seekable() is False which excludes random access");
      });
      return;
    }
    static constexpr Identifier id_tell("tell");
    const PythonPtr tell_result(
        PyObject_CallMethodObjArgs(dest_.get(), id_tell.get(), nullptr));
    if (ABSL_PREDICT_FALSE(tell_result == nullptr)) {
      FailOperation("tell()");
      return;
    }
    const std::optional<Position> file_pos =
        PositionFromPython(tell_result.get());
    if (ABSL_PREDICT_FALSE(file_pos == std::nullopt)) {
      FailOperation("PositionFromPython() after tell()");
      return;
    }
    set_start_pos(*file_pos);
    supports_random_access_ = true;
  }
  BeginRun();
}

void PythonWriter::Done() {
  BufferedWriter::Done();
  random_access_status_ = absl::OkStatus();
  if (owns_dest_ && dest_ != nullptr) {
    PythonLock lock;
    static constexpr Identifier id_close("close");
    const PythonPtr close_result(
        PyObject_CallMethodObjArgs(dest_.get(), id_close.get(), nullptr));
    if (ABSL_PREDICT_FALSE(close_result == nullptr)) FailOperation("close()");
  }
}

inline bool PythonWriter::FailOperation(absl::string_view operation) {
  RIEGELI_ASSERT(is_open())
      << "Failed precondition of PythonWriter::FailOperation(): "
         "Object closed";
  PythonLock::AssertHeld();
  if (ABSL_PREDICT_FALSE(!ok())) {
    // Ignore this error because `PythonWriter` already failed.
    PyErr_Clear();
    return false;
  }
  exception_ = Exception::Fetch();
  return Fail(absl::UnknownError(
      absl::StrCat(operation, " failed: ", exception_.message())));
}

bool PythonWriter::WriteInternal(absl::string_view src) {
  RIEGELI_ASSERT(!src.empty())
      << "Failed precondition of BufferedWriter::WriteInternal(): "
         "nothing to write";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedWriter::WriteInternal()";
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  PythonLock lock;
  if (ABSL_PREDICT_FALSE(write_function_ == nullptr)) {
    static constexpr Identifier id_write("write");
    write_function_.reset(PyObject_GetAttr(dest_.get(), id_write.get()));
    if (ABSL_PREDICT_FALSE(write_function_ == nullptr)) {
      return FailOperation("write()");
    }
  }
  do {
    const size_t length_to_write = UnsignedMin(
        src.size(),
        absl::bit_floor(size_t{std::numeric_limits<Py_ssize_t>::max()}));
    size_t length_written;
    {
      PythonPtr write_result;
      if (!use_bytes_) {
        // Prefer passing a `memoryview` to avoid copying memory.
        MemoryView memory_view;
        PyObject* const memory_view_object = memory_view.ToPython(
            absl::string_view(src.data(), length_to_write));
        if (ABSL_PREDICT_FALSE(memory_view_object == nullptr)) {
          return FailOperation("MemoryView::ToPython()");
        }
        write_result.reset(PyObject_CallFunctionObjArgs(
            write_function_.get(), memory_view_object, nullptr));
        if (ABSL_PREDICT_FALSE(write_result == nullptr)) {
          if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
            return FailOperation("write()");
          }
          PyErr_Clear();
          use_bytes_ = true;
        }
        if (ABSL_PREDICT_FALSE(!memory_view.Release())) {
          return FailOperation("MemoryView::Release()");
        }
      }
      if (use_bytes_) {
        // `write()` does not support `memoryview`. Use `bytes`.
        const PythonPtr bytes = BytesToPython(src.substr(0, length_to_write));
        if (ABSL_PREDICT_FALSE(bytes == nullptr)) {
          return FailOperation("BytesToPython()");
        }
        write_result.reset(PyObject_CallFunctionObjArgs(write_function_.get(),
                                                        bytes.get(), nullptr));
        if (ABSL_PREDICT_FALSE(write_result == nullptr)) {
          return FailOperation("write()");
        }
      }
      if (write_result.get() == Py_None) {
        // Python2 `file.write()` returns `None`, and would raise an exception
        // if less than the full length had been written. Python2 is dead, but
        // some classes still behave like that.
        length_written = length_to_write;
      } else {
        // `io.IOBase.write()` returns the length written.
        const std::optional<size_t> length_written_opt =
            SizeFromPython(write_result.get());
        if (ABSL_PREDICT_FALSE(length_written_opt == std::nullopt)) {
          return FailOperation("SizeFromPython() after write()");
        }
        length_written = *length_written_opt;
      }
    }
    if (ABSL_PREDICT_FALSE(length_written > length_to_write)) {
      return Fail(absl::InternalError("write() wrote more than requested"));
    }
    move_start_pos(length_written);
    src.remove_prefix(length_written);
  } while (!src.empty());
  return true;
}

bool PythonWriter::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!BufferedWriter::FlushImpl(flush_type))) return false;
  switch (flush_type) {
    case FlushType::kFromObject:
      if (!owns_dest_) return true;
      ABSL_FALLTHROUGH_INTENDED;
    case FlushType::kFromProcess:
    case FlushType::kFromMachine:
      PythonLock lock;
      static constexpr Identifier id_flush("flush");
      const PythonPtr flush_result(
          PyObject_CallMethodObjArgs(dest_.get(), id_flush.get(), nullptr));
      if (ABSL_PREDICT_FALSE(flush_result == nullptr)) {
        return FailOperation("flush()");
      }
      return true;
  }
  RIEGELI_ASSUME_UNREACHABLE()
      << "Unknown flush type: " << static_cast<int>(flush_type);
}

bool PythonWriter::SeekBehindBuffer(Position new_pos) {
  RIEGELI_ASSERT_NE(new_pos, pos())
      << "Failed precondition of BufferedWriter::SeekBehindBuffer(): "
         "position unchanged, use Seek() instead";
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::SeekBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!PythonWriter::SupportsRandomAccess())) {
    if (ok()) Fail(random_access_status_);
    return false;
  }
  PythonLock lock;
  if (new_pos > start_pos()) {
    // Seeking forwards.
    const std::optional<Position> size = SizeInternal();
    if (ABSL_PREDICT_FALSE(size == std::nullopt)) return false;
    if (ABSL_PREDICT_FALSE(new_pos > *size)) {
      // File ends.
      set_start_pos(*size);
      return false;
    }
  }
  set_start_pos(new_pos);
  const PythonPtr file_pos = PositionToPython(start_pos());
  if (ABSL_PREDICT_FALSE(file_pos == nullptr)) {
    return FailOperation("PositionToPython()");
  }
  static constexpr Identifier id_seek("seek");
  const PythonPtr seek_result(PyObject_CallMethodObjArgs(
      dest_.get(), id_seek.get(), file_pos.get(), nullptr));
  if (ABSL_PREDICT_FALSE(seek_result == nullptr)) {
    return FailOperation("seek()");
  }
  return true;
}

inline std::optional<Position> PythonWriter::SizeInternal() {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of PythonWriter::SizeInternal()";
  RIEGELI_ASSERT(PythonWriter::SupportsRandomAccess())
      << "Failed precondition of PythonWriter::SizeInternal(): "
         "random access not supported";
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of PythonWriter::SizeInternal(): "
         "buffer not empty";
  PythonLock::AssertHeld();
  absl::string_view operation;
  const PythonPtr file_pos = PositionToPython(0);
  if (ABSL_PREDICT_FALSE(file_pos == nullptr)) {
    FailOperation("PositionToPython()");
    return std::nullopt;
  }
  const PythonPtr whence = IntToPython(2);  // `io.SEEK_END`
  if (ABSL_PREDICT_FALSE(whence == nullptr)) {
    FailOperation("IntToPython()");
    return std::nullopt;
  }
  static constexpr Identifier id_seek("seek");
  PythonPtr result(PyObject_CallMethodObjArgs(
      dest_.get(), id_seek.get(), file_pos.get(), whence.get(), nullptr));
  if (result.get() == Py_None) {
    // Python2 `file.seek()` returns `None`. Python2 is dead, but some classes
    // still behave like that.
    static constexpr Identifier id_tell("tell");
    result.reset(
        PyObject_CallMethodObjArgs(dest_.get(), id_tell.get(), nullptr));
    operation = "tell()";
  } else {
    // `io.IOBase.seek()` returns the new position.
    operation = "seek()";
  }
  if (ABSL_PREDICT_FALSE(result == nullptr)) {
    FailOperation(operation);
    return std::nullopt;
  }
  const std::optional<Position> size = PositionFromPython(result.get());
  if (ABSL_PREDICT_FALSE(size == std::nullopt)) {
    FailOperation(absl::StrCat("PositionFromPython() after ", operation));
    return std::nullopt;
  }
  return *size;
}

std::optional<Position> PythonWriter::SizeBehindBuffer() {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::SizeBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!PythonWriter::SupportsRandomAccess())) {
    if (ok()) Fail(random_access_status_);
    return std::nullopt;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  PythonLock lock;
  const std::optional<Position> size = SizeInternal();
  if (ABSL_PREDICT_FALSE(size == std::nullopt)) return std::nullopt;
  const PythonPtr file_pos = PositionToPython(start_pos());
  if (ABSL_PREDICT_FALSE(file_pos == nullptr)) {
    FailOperation("PositionToPython()");
    return std::nullopt;
  }
  static constexpr Identifier id_seek("seek");
  const PythonPtr seek_result(PyObject_CallMethodObjArgs(
      dest_.get(), id_seek.get(), file_pos.get(), nullptr));
  if (ABSL_PREDICT_FALSE(seek_result == nullptr)) {
    FailOperation("seek()");
    return std::nullopt;
  }
  return *size;
}

bool PythonWriter::TruncateBehindBuffer(Position new_size) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::TruncateBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!PythonWriter::SupportsRandomAccess())) {
    if (ok()) Fail(random_access_status_);
    return false;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  PythonLock lock;
  const std::optional<Position> size = SizeInternal();
  if (ABSL_PREDICT_FALSE(size == std::nullopt)) return false;
  if (ABSL_PREDICT_FALSE(new_size > *size)) {
    // File ends.
    set_start_pos(*size);
    return false;
  }
  {
    const PythonPtr file_pos = PositionToPython(new_size);
    if (ABSL_PREDICT_FALSE(file_pos == nullptr)) {
      return FailOperation("PositionToPython()");
    }
    static constexpr Identifier id_seek("seek");
    const PythonPtr seek_result(PyObject_CallMethodObjArgs(
        dest_.get(), id_seek.get(), file_pos.get(), nullptr));
    if (ABSL_PREDICT_FALSE(seek_result == nullptr)) {
      return FailOperation("seek()");
    }
  }
  set_start_pos(new_size);
  static constexpr Identifier id_truncate("truncate");
  const PythonPtr truncate_result(
      PyObject_CallMethodObjArgs(dest_.get(), id_truncate.get(), nullptr));
  if (ABSL_PREDICT_FALSE(truncate_result == nullptr)) {
    return FailOperation("truncate()");
  }
  return true;
}

}  // namespace riegeli::python


================================================
FILE: python/riegeli/bytes/python_writer.h
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef PYTHON_RIEGELI_BYTES_PYTHON_WRITER_H_
#define PYTHON_RIEGELI_BYTES_PYTHON_WRITER_H_

// From https://docs.python.org/3/c-api/intro.html:
// Since Python may define some pre-processor definitions which affect the
// standard headers on some systems, you must include Python.h before any
// standard headers are included.
#include <Python.h>
// clang-format: do not reorder the above include.

#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "python/riegeli/base/utils.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_writer.h"

namespace riegeli::python {

// A `Writer` which writes to a Python binary I/O stream.
//
// The stream must support:
//  * `close()`          - for `Close()` if `Options::owns_dest()`
//  * `write(bytes)`
//  * `flush()`          - for `Flush()`
//  * `seekable()`
//  * `seek(int[, int])` - for `Seek()`, `Size()`, or `Truncate()`
//  * `tell()`           - for `Seek()`, `Size()`, or `Truncate()`
//  * `truncate()`       - for `Truncate()`
//
// `PythonWriter` supports random access if
// `Options::assumed_pos() == std::nullopt` and the stream supports random
// access (this is checked by calling `seekable()`).
class PythonWriter : public BufferedWriter {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // If `true`, `PythonWriter::Close()` closes the stream, and
    // `PythonWriter::Flush(flush_type)` flushes the stream even if `flush_type`
    // is `FlushType::kFromObject`.
    //
    // Default: `false`.
    Options& set_owns_dest(bool owns_dest) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      owns_dest_ = owns_dest;
      return *this;
    }
    Options&& set_owns_dest(bool owns_dest) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_owns_dest(owns_dest));
    }
    bool owns_dest() const { return owns_dest_; }

    // If `std::nullopt`, the current position reported by `pos()` corresponds
    // to the current stream position if possible, otherwise 0 is assumed as the
    // initial position. Random access is supported if the stream supports
    // random access.
    //
    // If not `std::nullopt`, this position is assumed initially, to be reported
    // by `pos()`. It does not need to correspond to the current stream
    // position. Random access is not supported.
    //
    // Default: `std::nullopt`.
    Options& set_assumed_pos(std::optional<Position> assumed_pos) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      assumed_pos_ = assumed_pos;
      return *this;
    }
    Options&& set_assumed_pos(std::optional<Position> assumed_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_assumed_pos(assumed_pos));
    }
    std::optional<Position> assumed_pos() const { return assumed_pos_; }

   private:
    bool owns_dest_ = false;
    std::optional<Position> assumed_pos_;
  };

  // Creates a closed `PythonWriter`.
  explicit PythonWriter(Closed) noexcept : BufferedWriter(kClosed) {}

  // Will write to `dest`.
  explicit PythonWriter(PyObject* dest, Options options = Options());

  PythonWriter(PythonWriter&& that) noexcept;
  PythonWriter& operator=(PythonWriter&& that) noexcept;

  // Returns a borrowed reference to the stream being written to.
  PyObject* dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.get(); }

  const Exception& exception() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return exception_;
  }

  bool SupportsRandomAccess() override { return supports_random_access_; }

  // For implementing `tp_traverse` of objects containing `PythonWriter`.
  int Traverse(visitproc visit, void* arg);

 protected:
  void Done() override;
  bool WriteInternal(absl::string_view src) override;
  bool FlushImpl(FlushType flush_type) override;
  bool SeekBehindBuffer(Position new_pos) override;
  std::optional<Position> SizeBehindBuffer() override;
  bool TruncateBehindBuffer(Position new_size) override;

 private:
  ABSL_ATTRIBUTE_COLD bool FailOperation(absl::string_view operation);
  std::optional<Position> SizeInternal();

  PythonPtrLocking dest_;
  bool owns_dest_ = false;
  bool supports_random_access_ = false;
  absl::Status random_access_status_;
  Exception exception_;
  PythonPtrLocking write_function_;
  bool use_bytes_ = false;
};

inline PythonWriter::PythonWriter(PythonWriter&& that) noexcept
    : BufferedWriter(static_cast<BufferedWriter&&>(that)),
      dest_(std::move(that.dest_)),
      owns_dest_(that.owns_dest_),
      supports_random_access_(
          std::exchange(that.supports_random_access_, false)),
      random_access_status_(std::move(that.random_access_status_)),
      exception_(std::move(that.exception_)),
      write_function_(std::move(that.write_function_)),
      use_bytes_(that.use_bytes_) {}

inline PythonWriter& PythonWriter::operator=(PythonWriter&& that) noexcept {
  BufferedWriter::operator=(static_cast<BufferedWriter&&>(that));
  dest_ = std::move(that.dest_);
  owns_dest_ = that.owns_dest_;
  supports_random_access_ = std::exchange(that.supports_random_access_, false);
  random_access_status_ = std::move(that.random_access_status_);
  exception_ = std::move(that.exception_);
  write_function_ = std::move(that.write_function_);
  use_bytes_ = that.use_bytes_;
  return *this;
}

inline int PythonWriter::Traverse(visitproc visit, void* arg) {
  Py_VISIT(dest_.get());
  Py_VISIT(write_function_.get());
  return exception_.Traverse(visit, arg);
}

}  // namespace riegeli::python

#endif  // PYTHON_RIEGELI_BYTES_PYTHON_WRITER_H_


================================================
FILE: python/riegeli/py_extension.bzl
================================================
"""Supports writing Python modules in C++."""

load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")
load("@rules_python//python:defs.bzl", "py_library")

def py_extension(
        name = None,
        srcs = None,
        hdrs = None,
        data = None,
        features = None,
        visibility = None,
        deps = None):
    """Creates a Python module implemented in C++.

    Python modules can depend on a py_extension. Other py_extensions can depend
    on a generated C++ library named with "_cc" suffix.

    Args:
      name: Name for this target.
      srcs: C++ source files.
      hdrs: C++ header files, for other py_extensions which depend on this.
      data: Files needed at runtime. This may include Python libraries.
      features: Passed to cc_library.
      visibility: Controls which rules can depend on this.
      deps: Other C++ libraries that this library depends upon.
    """

    cc_library_name = name + "_cc"
    cc_binary_name = name + ".so"
    cc_library(
        name = cc_library_name,
        srcs = srcs,
        hdrs = hdrs,
        data = data,
        features = features,
        visibility = visibility,
        deps = deps,
        alwayslink = True,
    )
    cc_binary(
        name = cc_binary_name,
        linkshared = True,
        linkstatic = True,
        visibility = ["//visibility:private"],
        deps = [cc_library_name],
    )

    py_library(
        name = name,
        data = [cc_binary_name],
        visibility = visibility,
    )


================================================
FILE: python/riegeli/python_configure.bzl
================================================
"""Repository rule for Python autoconfiguration.

`python_configure` depends on the following environment variables:

  * `PYTHON_BIN_PATH`: location of python binary.
  * `PYTHON_LIB_PATH`: Location of python libraries.
"""

_BAZEL_SH = "BAZEL_SH"
_PYTHON_BIN_PATH = "PYTHON_BIN_PATH"
_PYTHON_LIB_PATH = "PYTHON_LIB_PATH"
_TF_PYTHON_CONFIG_REPO = "TF_PYTHON_CONFIG_REPO"

def _tpl(repository_ctx, tpl, substitutions = {}, out = None):
    if not out:
        out = tpl
    repository_ctx.template(
        out,
        Label("//python/riegeli:{}.tpl".format(tpl)),
        substitutions,
    )

def _fail(msg):
    """Outputs failure message when auto configuration fails."""
    red = "\033[0;31m"
    no_color = "\033[0m"
    fail("{}Python Configuration Error:{} {}\n".format(red, no_color, msg))

def _is_windows(repository_ctx):
    """Returns true if the host operating system is Windows."""
    os_name = repository_ctx.os.name.lower()
    return "windows" in os_name

def _execute(
        repository_ctx,
        cmdline,
        error_msg = None,
        error_details = None,
        empty_stdout_fine = False):
    """Executes an arbitrary shell command.

    Args:
      repository_ctx: the repository_ctx object
      cmdline: list of strings, the command to execute
      error_msg: string, a summary of the error if the command fails
      error_details: string, details about the error or steps to fix it
      empty_stdout_fine: bool, if True, an empty stdout result is fine,
        otherwise it's an error
    Return:
      the result of repository_ctx.execute(cmdline)
    """
    result = repository_ctx.execute(cmdline)
    if result.stderr or not (empty_stdout_fine or result.stdout):
        _fail("\n".join([
            error_msg.strip() if error_msg else "Repository command failed",
            result.stderr.strip(),
            error_details if error_details else "",
        ]))
    return result

def _read_dir(repository_ctx, src_dir):
    """Returns a string with all files in a directory.

    Finds all files inside a directory, traversing subfolders and following
    symlinks. The returned string contains the full path of all files
    separated by line breaks.
    """
    if _is_windows(repository_ctx):
        src_dir = src_dir.replace("/", "\\")
        find_result = _execute(
            repository_ctx,
            ["cmd.exe", "/c", "dir", src_dir, "/b", "/s", "/a-d"],
            empty_stdout_fine = True,
        )

        # src_files will be used in genrule.outs where the paths must
        # use forward slashes.
        result = find_result.stdout.replace("\\", "/")
    else:
        find_result = _execute(
            repository_ctx,
            ["find", src_dir, "-follow", "-type", "f"],
            empty_stdout_fine = True,
        )
        result = find_result.stdout
    return result

def _genrule(src_dir, genrule_name, command, outs):
    """Returns a string with a genrule.

    Genrule executes the given command and produces the given outputs.
    """
    return (
        "genrule(\n" +
        '    name = "{}",\n' +
        "    outs = [\n" +
        "{}\n" +
        "    ],\n" +
        '    cmd = """\n' +
        "{}\n" +
        '   """,\n' +
        ")\n"
    ).format(genrule_name, outs, command)

def _norm_path(path):
    """Returns a path with '/' and removes the trailing slash."""
    return path.replace("\\", "/").rstrip("/")

def _symlink_genrule_for_dir(
        repository_ctx,
        src_dir,
        dest_dir,
        genrule_name,
        src_files = [],
        dest_files = []):
    """Returns a genrule to symlink (or copy if on Windows) a set of files.

    If src_dir is passed, files will be read from the given directory; otherwise
    we assume files are in src_files and dest_files
    """
    if src_dir != None:
        src_dir = _norm_path(src_dir)
        dest_dir = _norm_path(dest_dir)
        files = "\n".join(
            sorted(_read_dir(repository_ctx, src_dir).splitlines()),
        )

        # Create a list with the src_dir stripped to use for outputs.
        dest_files = files.replace(src_dir, "").splitlines()
        src_files = files.splitlines()
    command = []
    outs = []
    for i in range(len(dest_files)):
        if dest_files[i] != "":
            # If we have only one file to link we do not want to use the
            # dest_dir, as $(@D) will include the full path to the file.
            dest = "$(@D)/{}{}".format(
                dest_dir if len(dest_files) != 1 else "",
                dest_files[i],
            )

            # Copy the headers to create a sandboxable setup.
            cmd = "cp -f"
            command.append('{} "{}" "{}"'.format(cmd, src_files[i], dest))
            outs.append('        "{}{}",'.format(dest_dir, dest_files[i]))
    genrule = _genrule(
        src_dir,
        genrule_name,
        " && ".join(command),
        "\n".join(outs),
    )
    return genrule

def _get_python_bin(repository_ctx):
    """Gets the python bin path."""
    python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH)
    if python_bin != None:
        return python_bin
    python_bin_path = repository_ctx.which("python")
    if python_bin_path != None:
        return str(python_bin_path)
    _fail(("Cannot find python in PATH, please make sure " +
           "python is installed and add its directory in PATH, " +
           "or --define {}='/something/else'.\nPATH={}").format(
        _PYTHON_BIN_PATH,
        repository_ctx.os.environ.get("PATH", ""),
    ))

def _get_bash_bin(repository_ctx):
    """Gets the bash bin path."""
    bash_bin = repository_ctx.os.environ.get(_BAZEL_SH)
    if bash_bin != None:
        return bash_bin
    bash_bin_path = repository_ctx.which("bash")
    if bash_bin_path != None:
        return str(bash_bin_path)
    _fail(("Cannot find bash in PATH, please make sure " +
           "bash is installed and add its directory in PATH, " +
           "or --define {}='/path/to/bash'.\nPATH={}").format(
        _BAZEL_SH,
        repository_ctx.os.environ.get("PATH", ""),
    ))

def _get_python_runtime_pair(repository_ctx, python_bin):
    """Builds a py_runtime_pair definition."""
    return (
        "py_runtime_pair(\n" +
        '    name = "py_runtime_pair",\n' +
        "    py2_runtime = None,\n" +
        "    py3_runtime = \":py3_runtime\",\n" +
        ")\n" +
        "\n" +
        "py_runtime(\n" +
        '    name = "py3_runtime",\n' +
        '    interpreter_path = "{}",\n' +
        '    python_version = "PY3",\n' +
        ")\n"
    ).format(python_bin)

def _get_python_lib(repository_ctx, python_bin):
    """Gets the python lib path."""
    python_lib = repository_ctx.os.environ.get(_PYTHON_LIB_PATH)
    if python_lib != None:
        return python_lib
    print_lib = ("<<END\n" +
                 "import site\n" +
                 "import os\n" +
                 "\n" +
                 "try:\n" +
                 "  input = raw_input\n" +
                 "except NameError:\n" +
                 "  pass\n" +
                 "\n" +
                 "python_paths = []\n" +
                 "if os.getenv('PYTHONPATH') is not None:\n" +
                 "  python_paths = os.getenv('PYTHONPATH').split(':')\n" +
                 "try:\n" +
                 "  library_paths = site.getsitepackages()\n" +
                 "except AttributeError:\n" +
                 "  from distutils.sysconfig import get_python_lib\n" +
                 "  library_paths = [get_python_lib()]\n" +
                 "all_paths = set(python_paths + library_paths)\n" +
                 "paths = []\n" +
                 "for path in all_paths:\n" +
                 "  if os.path.isdir(path):\n" +
                 "    paths.append(path)\n" +
                 "if len(paths) >= 1:\n" +
                 "  print(paths[0])\n" +
                 "END")
    cmd = "{} - {}".format(python_bin, print_lib)
    result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd])
    return result.stdout.strip("\n")

def _check_python_lib(repository_ctx, python_lib):
    """Checks the python lib path."""
    cmd = 'test -d "{}" -a -x "{}"'.format(python_lib, python_lib)
    result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd])
    if result.return_code == 1:
        _fail("Invalid python library path: {}".format(python_lib))

def _check_python_bin(repository_ctx, python_bin):
    """Checks the python bin path."""
    cmd = '[[ -x "{}" ]] && [[ ! -d "{}" ]]'.format(python_bin, python_bin)
    result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd])
    if result.return_code == 1:
        _fail(("--define {}='{}' is not executable. " +
               "Is it the python binary?").format(
            _PYTHON_BIN_PATH,
            python_bin,
        ))

def _get_python_include(repository_ctx, python_bin):
    """Gets the python include path."""
    result = _execute(
        repository_ctx,
        [
            python_bin,
            "-c",
            "import importlib; " +
            "import importlib.util; " +
            "print(importlib.import_module('distutils.sysconfig').get_python_inc() " +
            "if importlib.util.find_spec('distutils.sysconfig') " +
            "else importlib.import_module('sysconfig').get_path('include'))",
        ],
        error_msg = "Problem getting python include path.",
        error_details = ("Is the Python binary path set up right? " +
                         "(See ./configure or {}.) " +
                         "Is distutils installed?").format(_PYTHON_BIN_PATH),
    )
    return result.stdout.splitlines()[0]

def _get_python_import_lib_name(repository_ctx, python_bin):
    """Gets Python import library name (pythonXY.lib) on Windows."""
    result = _execute(
        repository_ctx,
        [
            python_bin,
            "-c",
            "import sys; " +
            'print("python{}{}.lib".format(' +
            "sys.version_info.major, sys.version_info.minor))",
        ],
        error_msg = "Problem getting python import library.",
        error_details = ("Is the Python binary path set up right? " +
                         "(See ./configure or {}.) ").format(_PYTHON_BIN_PATH),
    )
    return result.stdout.splitlines()[0]

def _get_numpy_include(repository_ctx, python_bin):
    """Gets the numpy include path."""
    return _execute(
        repository_ctx,
        [
            python_bin,
            "-c",
            "import numpy; print(numpy.get_include())",
        ],
        error_msg = "Problem getting numpy include path.",
        error_details = "Is numpy installed?",
    ).stdout.splitlines()[0]

def _create_local_python_repository(repository_ctx):
    """Creates the repository containing files set up to build with Python."""
    python_bin = _get_python_bin(repository_ctx)
    _check_python_bin(repository_ctx, python_bin)
    python_runtime_pair = _get_python_runtime_pair(repository_ctx, python_bin)
    python_lib = _get_python_lib(repository_ctx, python_bin)
    _check_python_lib(repository_ctx, python_lib)
    python_include = _get_python_include(repository_ctx, python_bin)
    numpy_include = _get_numpy_include(repository_ctx, python_bin) + "/numpy"
    python_include_rule = _symlink_genrule_for_dir(
        repository_ctx,
        python_include,
        "python_include",
        "python_include",
    )
    python_import_lib_genrule = ""

    # To build Python C/C++ extension on Windows, we need to link to python
    # import library pythonXY.lib
    # See https://docs.python.org/3/extending/windows.html
    if _is_windows(repository_ctx):
        python_include = _norm_path(python_include)
        python_import_lib_name = _get_python_import_lib_name(
            repository_ctx,
            python_bin,
        )
        python_import_lib_src = "{}/libs/{}".format(
            python_include.rsplit("/", 1)[0],
            python_import_lib_name,
        )
        python_import_lib_genrule = _symlink_genrule_for_dir(
            repository_ctx,
            None,
            "",
            "python_import_lib",
            [python_import_lib_src],
            [python_import_lib_name],
        )
    numpy_include_rule = _symlink_genrule_for_dir(
        repository_ctx,
        numpy_include,
        "numpy_include/numpy",
        "numpy_include",
    )
    _tpl(repository_ctx, "BUILD", {
        "%{PYTHON_RUNTIME_PAIR}": python_runtime_pair,
        "%{PYTHON_INCLUDE_GENRULE}": python_include_rule,
        "%{PYTHON_IMPORT_LIB_GENRULE}": python_import_lib_genrule,
        "%{NUMPY_INCLUDE_GENRULE}": numpy_include_rule,
    })

def _create_remote_python_repository(repository_ctx, remote_config_repo):
    """Creates pointers to a remotely configured repo set up to build with Python.
    """
    repository_ctx.template("BUILD", Label(remote_config_repo + ":BUILD"), {})

def _python_autoconf_impl(repository_ctx):
    """Implementation of the python_autoconf repository rule."""
    if _TF_PYTHON_CONFIG_REPO in repository_ctx.os.environ:
        _create_remote_python_repository(
            repository_ctx,
            repository_ctx.os.environ[_TF_PYTHON_CONFIG_REPO],
        )
    else:
        _create_local_python_repository(repository_ctx)

python_configure = repository_rule(
    implementation = _python_autoconf_impl,
    environ = [
        _BAZEL_SH,
        _PYTHON_BIN_PATH,
        _PYTHON_LIB_PATH,
        _TF_PYTHON_CONFIG_REPO,
    ],
)
"""Detects and configures the local Python.

Add the following to your WORKSPACE FILE:

```python
python_configure(name = "local_config_python")
```

Args:
  name: A unique name for this workspace rule.
"""


================================================
FILE: python/riegeli/records/BUILD
================================================
load("@com_google_protobuf//bazel:proto_library.bzl", "proto_library")
load("@com_google_protobuf//bazel:py_proto_library.bzl", "py_proto_library")
load("@rules_python//python:defs.bzl", "py_library")
load("//python/riegeli:py_extension.bzl", "py_extension")

package(
    default_visibility = ["//python/riegeli:__subpackages__"],
    features = ["header_modules"],
)

licenses(["notice"])

py_extension(
    name = "record_reader",
    srcs = ["record_reader.cc"],
    # Python modules imported from C++.
    data = [
        ":records_metadata_py_pb2",
        ":skipped_region",
        "@com_google_protobuf//:protobuf_python",
    ],
    # record_reader.cc has #define before #include to influence what the
    # included files provide.
    features = ["-use_header_modules"],
    deps = [
        ":record_position_cc",
        "//python/riegeli/base:utils",
        "//python/riegeli/bytes:python_reader",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:chain",
        "//riegeli/base:compare",
        "//riegeli/base:types",
        "//riegeli/chunk_encoding:field_projection",
        "//riegeli/records:record_position",
        "//riegeli/records:record_reader",
        "//riegeli/records:skipped_region",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:string_view",
        "@rules_python//python/cc:current_py_cc_headers",
    ],
)

py_extension(
    name = "record_writer",
    srcs = ["record_writer.cc"],
    # record_writer.cc has #define before #include to influence what the
    # included files provide.
    features = ["-use_header_modules"],
    deps = [
        ":record_position_cc",
        "//python/riegeli/base:utils",
        "//python/riegeli/bytes:python_writer",
        "//riegeli/base:assert",
        "//riegeli/base:chain",
        "//riegeli/base:types",
        "//riegeli/records:record_writer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:string_view",
        "@rules_python//python/cc:current_py_cc_headers",
    ],
)

py_extension(
    name = "record_position",
    srcs = ["record_position.cc"],
    hdrs = ["record_position.h"],
    # record_position.cc has #define before #include to influence what the
    # included files provide.
    features = ["-use_header_modules"],
    deps = [
        "//python/riegeli/base:utils",
        "//riegeli/base:arithmetic",
        "//riegeli/base:types",
        "//riegeli/records:record_position",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/hash",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@rules_python//python/cc:current_py_cc_headers",
    ],
)

py_library(
    name = "skipped_region",
    srcs = ["skipped_region.py"],
)

proto_library(
    name = "records_metadata_proto",
    srcs = ["records_metadata.proto"],
    deps = ["@com_google_protobuf//:descriptor_proto"],
)

py_proto_library(
    name = "records_metadata_py_pb2",
    deps = [":records_metadata_proto"],
)


================================================
FILE: python/riegeli/records/__init__.py
================================================


================================================
FILE: python/riegeli/records/examples/BUILD
================================================
load("@rules_python//python:defs.bzl", "py_binary")

package(features = ["header_modules"])

licenses(["notice"])

py_binary(
    name = "write_read_records",
    srcs = ["write_read_records.py"],
    deps = [
        "//python/riegeli",
        "//python/riegeli/records/tests:records_test_py_pb2",
    ],
)


================================================
FILE: python/riegeli/records/examples/__init__.py
================================================


================================================
FILE: python/riegeli/records/examples/write_read_records.py
================================================
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Simple example which writes and reads a Riegeli/records file."""

import io

import riegeli
from riegeli.records.tests import records_test_pb2


def sample_string(i, size):
  piece = f'{i} '.encode()
  result = piece * -(-size // len(piece))  # len(result) >= size
  return result[:size]


def sample_message(i, size):
  return records_test_pb2.SimpleMessage(id=i, payload=sample_string(i, size))


def write_records(filename):
  print('Writing', filename)
  metadata = riegeli.RecordsMetadata()
  riegeli.set_record_type(metadata, records_test_pb2.SimpleMessage)
  with riegeli.RecordWriter(
      io.FileIO(filename, mode='wb'), options='transpose', metadata=metadata
  ) as writer:
    writer.write_messages(sample_message(i, 100) for i in range(100))


def read_records(filename):
  print('Reading', filename)
  with riegeli.RecordReader(
      io.FileIO(filename, mode='rb'),
      field_projection=[[
          records_test_pb2.SimpleMessage.DESCRIPTOR.fields_by_name['id'].number
      ]],
  ) as reader:
    print(
        ' '.join(
            str(record.id)
            for record in reader.read_messages(records_test_pb2.SimpleMessage)
        )
    )


def main():
  filename = '/tmp/riegeli_example'
  write_records(filename)
  read_records(filename)


if __name__ == '__main__':
  main()


================================================
FILE: python/riegeli/records/record_position.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// From https://docs.python.org/3/c-api/intro.html:
// Since Python may define some pre-processor definitions which affect the
// standard headers on some systems, you must include Python.h before any
// standard headers are included.
#define PY_SSIZE_T_CLEAN
#include <Python.h>
// clang-format: do not reorder the above include.

#include "python/riegeli/records/record_position.h"
// clang-format: do not reorder the above include.

#include <stdint.h>

#include <limits>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/hash/hash.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "python/riegeli/base/utils.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/types.h"
#include "riegeli/records/record_position.h"

namespace riegeli::python {

namespace {

struct PyRecordPositionObject {
  // clang-format off
  PyObject_HEAD
  static_assert(true, "");  // clang-format workaround.
  // clang-format on

  PythonWrapped<FutureRecordPosition> record_position;
};

extern PyTypeObject PyRecordPosition_Type;

// `extern "C"` sets the C calling convention for compatibility with the Python
// API. `static` avoids making symbols public, as `extern "C"` trumps anonymous
// namespace.
extern "C" {

static void RecordPositionDestructor(PyRecordPositionObject* self) {
  PythonUnlocked([&] { self->record_position.reset(); });
  Py_TYPE(self)->tp_free(self);
}

static PyRecordPositionObject* RecordPositionNew(PyTypeObject* cls,
                                                 PyObject* args,
                                                 PyObject* kwargs) {
  static constexpr const char* keywords[] = {"chunk_begin", "record_index",
                                             nullptr};
  PyObject* chunk_begin_arg;
  PyObject* record_index_arg;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "OO:RecordPosition", const_cast<char**>(keywords),
          &chunk_begin_arg, &record_index_arg))) {
    return nullptr;
  }
  const std::optional<Position> chunk_begin =
      PositionFromPython(chunk_begin_arg);
  if (ABSL_PREDICT_FALSE(chunk_begin == std::nullopt)) return nullptr;
  const std::optional<Position> record_index =
      PositionFromPython(record_index_arg);
  if (ABSL_PREDICT_FALSE(record_index == std::nullopt)) return nullptr;
  if (ABSL_PREDICT_FALSE(*chunk_begin > std::numeric_limits<uint64_t>::max()) ||
      ABSL_PREDICT_FALSE(*record_index >
                         std::numeric_limits<uint64_t>::max() - *chunk_begin)) {
    PyErr_Format(PyExc_OverflowError, "RecordPosition overflow: %llu/%llu",
                 static_cast<unsigned long long>(*chunk_begin),
                 static_cast<unsigned long long>(*record_index));
    return nullptr;
  }
  std::unique_ptr<PyRecordPositionObject, Deleter> self(
      reinterpret_cast<PyRecordPositionObject*>(cls->tp_alloc(cls, 0)));
  if (ABSL_PREDICT_FALSE(self == nullptr)) return nullptr;
  self->record_position.emplace(RecordPosition(
      IntCast<uint64_t>(*chunk_begin), IntCast<uint64_t>(*record_index)));
  return self.release();
}

static PyObject* RecordPositionChunkBegin(PyRecordPositionObject* self,
                                          void* closure) {
  const RecordPosition pos =
      PythonUnlocked([&] { return self->record_position->get(); });
  return PositionToPython(pos.chunk_begin()).release();
}

static PyObject* RecordPositionRecordIndex(PyRecordPositionObject* self,
                                           void* closure) {
  const RecordPosition pos =
      PythonUnlocked([&] { return self->record_position->get(); });
  return PositionToPython(pos.record_index()).release();
}

static PyObject* RecordPositionNumeric(PyRecordPositionObject* self,
                                       void* closure) {
  const RecordPosition pos =
      PythonUnlocked([&] { return self->record_position->get(); });
  return PositionToPython(pos.numeric()).release();
}

static PyObject* RecordPositionCompare(PyObject* a, PyObject* b, int op) {
  if (ABSL_PREDICT_FALSE(!PyObject_TypeCheck(a, &PyRecordPosition_Type)) ||
      ABSL_PREDICT_FALSE(!PyObject_TypeCheck(b, &PyRecordPosition_Type))) {
    Py_INCREF(Py_NotImplemented);
    return Py_NotImplemented;
  }
  RecordPosition a_pos;
  RecordPosition b_pos;
  PythonUnlocked([&] {
    a_pos =
        reinterpret_cast<PyRecordPositionObject*>(a)->record_position->get();
    b_pos =
        reinterpret_cast<PyRecordPositionObject*>(b)->record_position->get();
  });
  switch (op) {
    case Py_EQ:
      return PyBool_FromLong(a_pos == b_pos);
    case Py_NE:
      return PyBool_FromLong(a_pos != b_pos);
    case Py_LT:
      return PyBool_FromLong(a_pos < b_pos);
    case Py_GT:
      return PyBool_FromLong(a_pos > b_pos);
    case Py_LE:
      return PyBool_FromLong(a_pos <= b_pos);
    case Py_GE:
      return PyBool_FromLong(a_pos >= b_pos);
    default:
      Py_INCREF(Py_NotImplemented);
      return Py_NotImplemented;
  }
}

static Py_hash_t RecordPositionHash(PyRecordPositionObject* self) {
  const RecordPosition pos =
      PythonUnlocked([&] { return self->record_position->get(); });
  Py_hash_t hash = static_cast<Py_hash_t>(absl::Hash<RecordPosition>()(pos));
  if (ABSL_PREDICT_FALSE(hash == -1)) hash = -2;
  return hash;
}

static PyObject* RecordPositionStr(PyRecordPositionObject* self) {
  const RecordPosition pos =
      PythonUnlocked([&] { return self->record_position->get(); });
  return StringToPython(pos.ToString()).release();
}

static PyRecordPositionObject* RecordPositionFromStr(PyTypeObject* cls,
                                                     PyObject* args,
                                                     PyObject* kwargs) {
  static constexpr const char* keywords[] = {"serialized", nullptr};
  PyObject* serialized_arg;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "O:from_str", const_cast<char**>(keywords),
          &serialized_arg))) {
    return nullptr;
  }
  StrOrBytes serialized;
  if (ABSL_PREDICT_FALSE(!serialized.FromPython(serialized_arg))) {
    return nullptr;
  }
  RecordPosition pos;
  if (ABSL_PREDICT_FALSE(!pos.FromString(serialized))) {
    PyErr_SetString(PyExc_ValueError, "RecordPosition.from_str() failed");
    return nullptr;
  }
  std::unique_ptr<PyRecordPositionObject, Deleter> self(
      reinterpret_cast<PyRecordPositionObject*>(cls->tp_alloc(cls, 0)));
  if (ABSL_PREDICT_FALSE(self == nullptr)) return nullptr;
  self->record_position.emplace(pos);
  return self.release();
}

static PyObject* RecordPositionRepr(PyRecordPositionObject* self) {
  const RecordPosition pos =
      PythonUnlocked([&] { return self->record_position->get(); });
  return StringToPython(absl::StrCat("RecordPosition(", pos.chunk_begin(), ", ",
                                     pos.record_index(), ")"))
      .release();
}

static PyObject* RecordPositionToBytes(PyRecordPositionObject* self,
                                       PyObject* args) {
  const RecordPosition pos =
      PythonUnlocked([&] { return self->record_position->get(); });
  return BytesToPython(pos.ToBytes()).release();
}

static PyRecordPositionObject* RecordPositionFromBytes(PyTypeObject* cls,
                                                       PyObject* args,
                                                       PyObject* kwargs) {
  static constexpr const char* keywords[] = {"serialized", nullptr};
  PyObject* serialized_arg;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "O:from_bytes", const_cast<char**>(keywords),
          &serialized_arg))) {
    return nullptr;
  }
  BytesLike serialized;
  if (ABSL_PREDICT_FALSE(!serialized.FromPython(serialized_arg))) {
    return nullptr;
  }
  RecordPosition pos;
  if (ABSL_PREDICT_FALSE(!pos.FromBytes(serialized))) {
    PyErr_SetString(PyExc_ValueError, "RecordPosition.from_bytes() failed");
    return nullptr;
  }
  std::unique_ptr<PyRecordPositionObject, Deleter> self(
      reinterpret_cast<PyRecordPositionObject*>(cls->tp_alloc(cls, 0)));
  if (ABSL_PREDICT_FALSE(self == nullptr)) return nullptr;
  self->record_position.emplace(pos);
  return self.release();
}

}  // extern "C"

const PyMethodDef RecordPositionMethods[] = {
    {"from_str", reinterpret_cast<PyCFunction>(RecordPositionFromStr),
     METH_VARARGS | METH_KEYWORDS | METH_CLASS,
     R"doc(
from_str(type, serialized: str | bytes) -> RecordPosition

Parses RecordPosition from its text format.

Args:
  serialized: Text string to parse.
)doc"},
    {"to_bytes", reinterpret_cast<PyCFunction>(RecordPositionToBytes),
     METH_NOARGS,
     R"doc(
to_bytes(self) -> bytes

Returns the RecordPosition serialized to its binary format.

Serialized byte strings have the same natural order as the corresponding
positions.
)doc"},
    {"from_bytes", reinterpret_cast<PyCFunction>(RecordPositionFromBytes),
     METH_VARARGS | METH_KEYWORDS | METH_CLASS, R"doc(
from_bytes(
    type, serialized: bytes | bytearray | memoryview) -> RecordPosition

Parses RecordPosition from its binary format.

Serialized byte strings have the same natural order as the corresponding
positions.

Args:
  serialized: Byte string to parse.
)doc"},
    {nullptr, nullptr, 0, nullptr},
};

const PyGetSetDef RecordPositionGetSet[] = {
    {const_cast<char*>("chunk_begin"),
     reinterpret_cast<getter>(RecordPositionChunkBegin), nullptr,
     const_cast<char*>(R"doc(
chunk_begin: int

File position of the beginning of the chunk containing the given record.
)doc"),
     nullptr},
    {const_cast<char*>("record_index"),
     reinterpret_cast<getter>(RecordPositionRecordIndex), nullptr,
     const_cast<char*>(R"doc(
record_index: int

Index of the record within the chunk.
)doc"),
     nullptr},
    {const_cast<char*>("numeric"),
     reinterpret_cast<getter>(RecordPositionNumeric), nullptr,
     const_cast<char*>(R"doc(
numeric: int

Converts RecordPosition to an integer scaled between 0 and file size.

Distinct RecordPositions of a valid file have distinct numeric values.
)doc"),
     nullptr},
    {nullptr, nullptr, nullptr, nullptr, nullptr}};

PyTypeObject PyRecordPosition_Type = {
    // clang-format off
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
    // clang-format on
    "riegeli.records.record_position.RecordPosition",        // tp_name
    sizeof(PyRecordPositionObject),                          // tp_basicsize
    0,                                                       // tp_itemsize
    reinterpret_cast<destructor>(RecordPositionDestructor),  // tp_dealloc
#if PY_VERSION_HEX >= 0x03080000
    0,  // tp_vectorcall_offset
#else
    nullptr,  // tp_print
#endif
    nullptr,                                          // tp_getattr
    nullptr,                                          // tp_setattr
    nullptr,                                          // tp_as_async
    reinterpret_cast<reprfunc>(RecordPositionRepr),   // tp_repr
    nullptr,                                          // tp_as_number
    nullptr,                                          // tp_as_sequence
    nullptr,                                          // tp_as_mapping
    reinterpret_cast<hashfunc>(RecordPositionHash),   // tp_hash
    nullptr,                                          // tp_call
    reinterpret_cast<reprfunc>(RecordPositionStr),    // tp_str
    nullptr,                                          // tp_getattro
    nullptr,                                          // tp_setattro
    nullptr,                                          // tp_as_buffer
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,         // tp_flags
    R"doc(
RecordPosition(chunk_begin: int, record_index: int) -> RecordPosition

Represents a position in a Riegeli/records file.

There are two ways of expressing positions, both strictly monotonic:
 * RecordPosition - Faster for seeking.
 * int            - Scaled between 0 and file size.

RecordPosition can be converted to int by the numeric property.

Working with RecordPosition is recommended, unless it is needed to seek to an
approximate position interpolated along the file, e.g. for splitting the file
into shards, or unless the position must be expressed as an integer from the
range [0, file_size] in order to fit into a preexisting API.

Both RecordReader and RecordWriter return positions. A position from
RecordWriter can act as a future: accessing its contents for the first time
might block, waiting for pending operations to complete.
)doc",                                                // tp_doc
    nullptr,                                          // tp_traverse
    nullptr,                                          // tp_clear
    RecordPositionCompare,                            // tp_richcompare
    0,                                                // tp_weaklistoffset
    nullptr,                                          // tp_iter
    nullptr,                                          // tp_iternext
    const_cast<PyMethodDef*>(RecordPositionMethods),  // tp_methods
    nullptr,                                          // tp_members
    const_cast<PyGetSetDef*>(RecordPositionGetSet),   // tp_getset
    nullptr,                                          // tp_base
    nullptr,                                          // tp_dict
    nullptr,                                          // tp_descr_get
    nullptr,                                          // tp_descr_set
    0,                                                // tp_dictoffset
    nullptr,                                          // tp_init
    nullptr,                                          // tp_alloc
    reinterpret_cast<newfunc>(RecordPositionNew),     // tp_new
    nullptr,                                          // tp_free
    nullptr,                                          // tp_is_gc
    nullptr,                                          // tp_bases
    nullptr,                                          // tp_mro
    nullptr,                                          // tp_cache
    nullptr,                                          // tp_subclasses
    nullptr,                                          // tp_weaklist
    nullptr,                                          // tp_del
    0,                                                // tp_version_tag
    nullptr,                                          // tp_finalize
};

PythonPtr RecordPositionToPython(FutureRecordPosition value) {
  PythonPtr self(PyRecordPosition_Type.tp_alloc(&PyRecordPosition_Type, 0));
  if (ABSL_PREDICT_FALSE(self == nullptr)) return nullptr;
  reinterpret_cast<PyRecordPositionObject*>(self.get())
      ->record_position.emplace(std::move(value));
  return self;
}

std::optional<RecordPosition> RecordPositionFromPython(PyObject* object) {
  if (ABSL_PREDICT_FALSE(!PyObject_TypeCheck(object, &PyRecordPosition_Type))) {
    PyErr_Format(PyExc_TypeError, "Expected RecordPosition, not %s",
                 Py_TYPE(object)->tp_name);
    return std::nullopt;
  }
  return PythonUnlocked([&] {
    return reinterpret_cast<PyRecordPositionObject*>(object)
        ->record_position->get();
  });
}

const char* const kModuleName = "riegeli.records.record_position";
const char kModuleDoc[] =
    R"doc(Represents a position in a Riegeli/records file.)doc";

PyModuleDef kModuleDef = {
    PyModuleDef_HEAD_INIT,
    kModuleName,  // m_name
    kModuleDoc,   // m_doc
    -1,           // m_size
    nullptr,      // m_methods
    nullptr,      // m_slots
    nullptr,      // m_traverse
    nullptr,      // m_clear
    nullptr,      // m_free
};

PyObject* InitModule() {
  if (ABSL_PREDICT_FALSE(PyType_Ready(&PyRecordPosition_Type) < 0)) {
    return nullptr;
  }
  PythonPtr module(PyModule_Create(&kModuleDef));
  if (ABSL_PREDICT_FALSE(module == nullptr)) return nullptr;
  Py_INCREF(&PyRecordPosition_Type);
  if (ABSL_PREDICT_FALSE(PyModule_AddObject(module.get(), "RecordPosition",
                                            reinterpret_cast<PyObject*>(
                                                &PyRecordPosition_Type)) < 0)) {
    return nullptr;
  }
  static constexpr RecordPositionApi kRecordPositionApi = {
      RecordPositionToPython,
      RecordPositionFromPython,
  };
  if (ABSL_PREDICT_FALSE(!ExportCapsule(
          module.get(), kRecordPositionCapsuleName, &kRecordPositionApi))) {
    return nullptr;
  }
  return module.release();
}

}  // namespace

PyMODINIT_FUNC PyInit_record_position() { return InitModule(); }

}  // namespace riegeli::python


================================================
FILE: python/riegeli/records/record_position.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef PYTHON_RIEGELI_RECORDS_RECORD_POSITION_H_
#define PYTHON_RIEGELI_RECORDS_RECORD_POSITION_H_

// From https://docs.python.org/3/c-api/intro.html:
// Since Python may define some pre-processor definitions which affect the
// standard headers on some systems, you must include Python.h before any
// standard headers are included.
#include <Python.h>
// clang-format: do not reorder the above include.

#include <optional>

#include "python/riegeli/base/utils.h"
#include "riegeli/records/record_position.h"

namespace riegeli::python {

// Access the API thus:
// ```
// static constexpr ImportedCapsule<RecordPositionApi> kRecordPositionApi(
//    kRecordPositionCapsuleName);
// ```

struct RecordPositionApi {
  PythonPtr (*RecordPositionToPython)(FutureRecordPosition value);
  std::optional<RecordPosition> (*RecordPositionFromPython)(PyObject* object);
};

inline constexpr const char* kRecordPositionCapsuleName =
    "riegeli.records.record_position._CPPAPI";

}  // namespace riegeli::python

#endif  // PYTHON_RIEGELI_RECORDS_RECORD_POSITION_H_


================================================
FILE: python/riegeli/records/record_reader.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// From https://docs.python.org/3/c-api/intro.html:
// Since Python may define some pre-processor definitions which affect the
// standard headers on some systems, you must include Python.h before any
// standard headers are included.
#define PY_SSIZE_T_CLEAN
#include <Python.h>
// clang-format: do not reorder the above include.

#include <stddef.h>

#include <functional>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "python/riegeli/base/utils.h"
#include "python/riegeli/bytes/python_reader.h"
#include "python/riegeli/records/record_position.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/types.h"
#include "riegeli/chunk_encoding/field_projection.h"
#include "riegeli/records/record_position.h"
#include "riegeli/records/record_reader.h"
#include "riegeli/records/skipped_region.h"

namespace riegeli::python {

namespace {

constexpr ImportedCapsule<RecordPositionApi> kRecordPositionApi(
    kRecordPositionCapsuleName);

// `extern "C"` sets the C calling convention for compatibility with the Python
// API. `static` avoids making symbols public, as `extern "C"` trumps anonymous
// namespace.
extern "C" {

static PyObject* GetRecordType(PyObject* self, PyObject* args,
                               PyObject* kwargs) {
  static constexpr const char* keywords[] = {"metadata", nullptr};
  PyObject* metadata_arg;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "O:get_record_type", const_cast<char**>(keywords),
          &metadata_arg))) {
    return nullptr;
  }
  // record_type_name = metadata.record_type_name
  static constexpr Identifier id_record_type_name("record_type_name");
  const PythonPtr record_type_name(
      PyObject_GetAttr(metadata_arg, id_record_type_name.get()));
  if (ABSL_PREDICT_FALSE(record_type_name == nullptr)) return nullptr;
  // if not record_type_name: return None
  const int record_type_name_is_true = PyObject_IsTrue(record_type_name.get());
  if (ABSL_PREDICT_FALSE(record_type_name_is_true < 0)) return nullptr;
  if (record_type_name_is_true == 0) Py_RETURN_NONE;
  // file_descriptors = metadata.file_descriptor
  static constexpr Identifier id_file_descriptor("file_descriptor");
  const PythonPtr file_descriptors(
      PyObject_GetAttr(metadata_arg, id_file_descriptor.get()));
  if (ABSL_PREDICT_FALSE(file_descriptors == nullptr)) return nullptr;
  // if not file_descriptors: return None
  const int file_descriptors_is_true = PyObject_IsTrue(file_descriptors.get());
  if (ABSL_PREDICT_FALSE(file_descriptors_is_true < 0)) return nullptr;
  if (file_descriptors_is_true == 0) Py_RETURN_NONE;
  // pool = DescriptorPool()
  static constexpr ImportedConstant kDescriptorPool(
      "google.protobuf.descriptor_pool", "DescriptorPool");
  if (ABSL_PREDICT_FALSE(!kDescriptorPool.Verify())) return nullptr;
  const PythonPtr pool(
      PyObject_CallFunctionObjArgs(kDescriptorPool.get(), nullptr));
  if (ABSL_PREDICT_FALSE(pool == nullptr)) return nullptr;
  // for file_descriptor in file_descriptors:
  //   pool.Add(file_descriptor)
  const PythonPtr iter(PyObject_GetIter(file_descriptors.get()));
  if (ABSL_PREDICT_FALSE(iter == nullptr)) return nullptr;
  while (const PythonPtr file_descriptor{PyIter_Next(iter.get())}) {
    static constexpr Identifier id_Add("Add");
    const PythonPtr add_result(PyObject_CallMethodObjArgs(
        pool.get(), id_Add.get(), file_descriptor.get(), nullptr));
    if (ABSL_PREDICT_FALSE(add_result == nullptr)) return nullptr;
  }
  if (ABSL_PREDICT_FALSE(PyErr_Occurred() != nullptr)) return nullptr;
  // message_descriptor = pool.FindMessageTypeByName(record_type_name)
  static constexpr Identifier id_FindMessageTypeByName("FindMessageTypeByName");
  const PythonPtr message_descriptor(
      PyObject_CallMethodObjArgs(pool.get(), id_FindMessageTypeByName.get(),
                                 record_type_name.get(), nullptr));
  if (ABSL_PREDICT_FALSE(message_descriptor == nullptr)) return nullptr;
  // return GetMessageClass(message_descriptor)
  const PythonPtr message_factory(
      PyImport_ImportModule("google.protobuf.message_factory"));
  if (ABSL_PREDICT_FALSE(message_factory == nullptr)) return nullptr;
  static constexpr Identifier id_GetMessageClass("GetMessageClass");
  return PyObject_CallMethodObjArgs(message_factory.get(),
                                    id_GetMessageClass.get(),
                                    message_descriptor.get(), nullptr);
}

}  // extern "C"

struct PyRecordReaderObject {
  // clang-format off
  PyObject_HEAD
  static_assert(true, "");  // clang-format workaround.
  // clang-format on

  PythonWrapped<RecordReader<PythonReader>> record_reader;
  PyObject* recovery;
  PythonWrapped<Exception> recovery_exception;
};

extern PyTypeObject PyRecordReader_Type;

struct PyRecordIterObject {
  // clang-format off
  PyObject_HEAD
  static_assert(true, "");  // clang-format workaround.
  // clang-format on

  PyObject* (*read_record)(PyRecordReaderObject* self, PyObject* args);
  PyRecordReaderObject* record_reader;
  PyObject* args;
};

extern PyTypeObject PyRecordIter_Type;

bool RecordReaderHasException(PyRecordReaderObject* self) {
  return self->recovery_exception.has_value() || !self->record_reader->ok();
}

void SetExceptionFromRecordReader(PyRecordReaderObject* self) {
  if (self->recovery_exception.has_value()) {
    self->recovery_exception->Restore();
    return;
  }
  RIEGELI_ASSERT(!self->record_reader->ok())
      << "Failed precondition of SetExceptionFromRecordReader(): "
         "RecordReader OK";
  if (!self->record_reader->src().exception().ok()) {
    self->record_reader->src().exception().Restore();
    return;
  }
  SetRiegeliError(self->record_reader->status());
}

std::optional<int> VerifyFieldNumber(long field_number_value) {
  static_assert(Field::kExistenceOnly == 0,
                "VerifyFieldNumber() assumes that Field::kExistenceOnly == 0");
  if (ABSL_PREDICT_FALSE(field_number_value < Field::kExistenceOnly ||
                         field_number_value > (1 << 29) - 1)) {
    PyErr_Format(PyExc_OverflowError, "Field number out of range: %ld",
                 field_number_value);
    return std::nullopt;
  }
  return IntCast<int>(field_number_value);
}

std::optional<int> FieldNumberFromPython(PyObject* object) {
  if (ABSL_PREDICT_FALSE(!PyLong_Check(object))) {
    PyErr_Format(PyExc_TypeError, "Expected int, not %s",
                 Py_TYPE(object)->tp_name);
    return std::nullopt;
  }
  const long field_number_value = PyLong_AsLong(object);
  if (ABSL_PREDICT_FALSE(field_number_value == -1) && PyErr_Occurred()) {
    return std::nullopt;
  }
  return VerifyFieldNumber(field_number_value);
}

std::optional<FieldProjection> FieldProjectionFromPython(PyObject* object) {
  FieldProjection field_projection;
  const PythonPtr field_iter(PyObject_GetIter(object));
  if (ABSL_PREDICT_FALSE(field_iter == nullptr)) return std::nullopt;
  while (const PythonPtr field_object{PyIter_Next(field_iter.get())}) {
    Field field;
    const PythonPtr field_number_iter(PyObject_GetIter(field_object.get()));
    if (ABSL_PREDICT_FALSE(field_number_iter == nullptr)) return std::nullopt;
    while (const PythonPtr field_number_object{
        PyIter_Next(field_number_iter.get())}) {
      const std::optional<int> field_number =
          FieldNumberFromPython(field_number_object.get());
      if (ABSL_PREDICT_FALSE(field_number == std::nullopt)) return std::nullopt;
      field.AddFieldNumber(*field_number);
    }
    if (ABSL_PREDICT_FALSE(PyErr_Occurred() != nullptr)) return std::nullopt;
    field_projection.AddField(std::move(field));
  }
  if (ABSL_PREDICT_FALSE(PyErr_Occurred() != nullptr)) return std::nullopt;
  return field_projection;
}

// `extern "C"` sets the C calling convention for compatibility with the Python
// API. `static` avoids making symbols public, as `extern "C"` trumps anonymous
// namespace.
extern "C" {

static void RecordReaderDestructor(PyRecordReaderObject* self) {
  PyObject_GC_UnTrack(self);
#if PY_VERSION_HEX < 0x030D0000  // < 3.13
  Py_TRASHCAN_BEGIN(self, RecordReaderDestructor);
#endif
  PythonUnlocked([&] { self->record_reader.reset(); });
  Py_XDECREF(self->recovery);
  self->recovery_exception.reset();
  Py_TYPE(self)->tp_free(self);
#if PY_VERSION_HEX < 0x030D0000  // < 3.13
  Py_TRASHCAN_END;
#endif
}

static int RecordReaderTraverse(PyRecordReaderObject* self, visitproc visit,
                                void* arg) {
  Py_VISIT(self->recovery);
  if (self->recovery_exception.has_value()) {
    const int recovery_exception_result =
        self->recovery_exception->Traverse(visit, arg);
    if (ABSL_PREDICT_FALSE(recovery_exception_result != 0)) {
      return recovery_exception_result;
    }
  }
  if (self->record_reader.has_value()) {
    return self->record_reader->src().Traverse(visit, arg);
  }
  return 0;
}

static int RecordReaderClear(PyRecordReaderObject* self) {
  PythonUnlocked([&] { self->record_reader.reset(); });
  Py_CLEAR(self->recovery);
  self->recovery_exception.reset();
  return 0;
}

static int RecordReaderInit(PyRecordReaderObject* self, PyObject* args,
                            PyObject* kwargs) {
  static constexpr const char* keywords[] = {"src",
                                             "owns_src",
                                             "assumed_pos",
                                             "min_buffer_size",
                                             "max_buffer_size",
                                             "buffer_size",
                                             "field_projection",
                                             "recovery",
                                             nullptr};
  PyObject* src_arg;
  PyObject* owns_src_arg = nullptr;
  PyObject* assumed_pos_arg = nullptr;
  PyObject* min_buffer_size_arg = nullptr;
  PyObject* max_buffer_size_arg = nullptr;
  PyObject* buffer_size_arg = nullptr;
  PyObject* field_projection_arg = nullptr;
  PyObject* recovery_arg = nullptr;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "O|$OOOOOOO:RecordReader", const_cast<char**>(keywords),
          &src_arg, &owns_src_arg, &assumed_pos_arg, &min_buffer_size_arg,
          &max_buffer_size_arg, &buffer_size_arg, &field_projection_arg,
          &recovery_arg))) {
    return -1;
  }

  PythonReader::Options python_reader_options;
  python_reader_options.set_owns_src(true);
  if (owns_src_arg != nullptr) {
    const int owns_src_is_true = PyObject_IsTrue(owns_src_arg);
    if (ABSL_PREDICT_FALSE(owns_src_is_true < 0)) return -1;
    python_reader_options.set_owns_src(owns_src_is_true != 0);
  }
  if (assumed_pos_arg != nullptr && assumed_pos_arg != Py_None) {
    const std::optional<Position> assumed_pos =
        PositionFromPython(assumed_pos_arg);
    if (ABSL_PREDICT_FALSE(assumed_pos == std::nullopt)) return -1;
    python_reader_options.set_assumed_pos(*assumed_pos);
  }
  if (buffer_size_arg != nullptr && buffer_size_arg != Py_None) {
    min_buffer_size_arg = buffer_size_arg;
    max_buffer_size_arg = buffer_size_arg;
  }
  if (min_buffer_size_arg != nullptr) {
    const std::optional<size_t> min_buffer_size =
        SizeFromPython(min_buffer_size_arg);
    if (ABSL_PREDICT_FALSE(min_buffer_size == std::nullopt)) return -1;
    python_reader_options.set_min_buffer_size(*min_buffer_size);
  }
  if (max_buffer_size_arg != nullptr) {
    const std::optional<size_t> max_buffer_size =
        SizeFromPython(max_buffer_size_arg);
    if (ABSL_PREDICT_FALSE(max_buffer_size == std::nullopt)) return -1;
    python_reader_options.set_max_buffer_size(*max_buffer_size);
  }

  RecordReaderBase::Options record_reader_options;
  if (field_projection_arg != nullptr && field_projection_arg != Py_None) {
    std::optional<FieldProjection> field_projection =
        FieldProjectionFromPython(field_projection_arg);
    if (ABSL_PREDICT_FALSE(field_projection == std::nullopt)) return -1;
    record_reader_options.set_field_projection(*std::move(field_projection));
  }
  if (recovery_arg != nullptr && recovery_arg != Py_None) {
    Py_INCREF(recovery_arg);
    Py_XDECREF(self->recovery);
    self->recovery = recovery_arg;
    record_reader_options.set_recovery([self](
                                           const SkippedRegion& skipped_region,
                                           RecordReaderBase& record_reader) {
      PythonLock lock;
      const PythonPtr begin_object = PositionToPython(skipped_region.begin());
      if (ABSL_PREDICT_FALSE(begin_object == nullptr)) {
        self->recovery_exception.emplace(Exception::Fetch());
        return false;
      }
      const PythonPtr end_object = PositionToPython(skipped_region.end());
      if (ABSL_PREDICT_FALSE(end_object == nullptr)) {
        self->recovery_exception.emplace(Exception::Fetch());
        return false;
      }
      const PythonPtr message_object = StringToPython(skipped_region.message());
      if (ABSL_PREDICT_FALSE(message_object == nullptr)) {
        self->recovery_exception.emplace(Exception::Fetch());
        return false;
      }
      static constexpr ImportedConstant kSkippedRegion(
          "riegeli.records.skipped_region", "SkippedRegion");
      if (ABSL_PREDICT_FALSE(!kSkippedRegion.Verify())) {
        self->recovery_exception.emplace(Exception::Fetch());
        return false;
      }
      const PythonPtr skipped_region_object(PyObject_CallFunctionObjArgs(
          kSkippedRegion.get(), begin_object.get(), end_object.get(),
          message_object.get(), nullptr));
      if (ABSL_PREDICT_FALSE(skipped_region_object == nullptr)) {
        self->recovery_exception.emplace(Exception::Fetch());
        return false;
      }
      const PythonPtr recovery_result(PyObject_CallFunctionObjArgs(
          self->recovery, skipped_region_object.get(), nullptr));
      if (ABSL_PREDICT_FALSE(recovery_result == nullptr)) {
        if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
          PyErr_Clear();
        } else {
          self->recovery_exception.emplace(Exception::Fetch());
        }
        return false;
      }
      return true;
    });
  }

  PythonReader python_reader(src_arg, std::move(python_reader_options));
  PythonUnlocked([&] {
    self->record_reader.emplace(std::move(python_reader),
                                std::move(record_reader_options));
  });
  if (ABSL_PREDICT_FALSE(!self->record_reader->ok())) {
    self->record_reader->src().Close();
    SetExceptionFromRecordReader(self);
    return -1;
  }
  return 0;
}

static PyObject* RecordReaderSrc(PyRecordReaderObject* self, void* closure) {
  PyObject* const src = ABSL_PREDICT_FALSE(!self->record_reader.has_value())
                            ? Py_None
                            : self->record_reader->src().src();
  Py_INCREF(src);
  return src;
}

static PyObject* RecordReaderRepr(PyRecordReaderObject* self) {
  const PythonPtr format = StringToPython("<RecordReader src={!r}>");
  if (ABSL_PREDICT_FALSE(format == nullptr)) return nullptr;
  // return format.format(self.src)
  PyObject* const src = ABSL_PREDICT_FALSE(!self->record_reader.has_value())
                            ? Py_None
                            : self->record_reader->src().src();
  static constexpr Identifier id_format("format");
  return PyObject_CallMethodObjArgs(format.get(), id_format.get(), src,
                                    nullptr);
}

static PyObject* RecordReaderEnter(PyObject* self, PyObject* args) {
  // return self
  Py_INCREF(self);
  return self;
}

static PyObject* RecordReaderExit(PyRecordReaderObject* self, PyObject* args) {
  PyObject* exc_type;
  PyObject* exc_value;
  PyObject* traceback;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type,
                                           &exc_value, &traceback))) {
    return nullptr;
  }
  // self.close(), suppressing exceptions if exc_type != None.
  if (ABSL_PREDICT_TRUE(self->record_reader.has_value())) {
    const bool close_ok =
        PythonUnlocked([&] { return self->record_reader->Close(); });
    if (ABSL_PREDICT_FALSE(!close_ok) && exc_type == Py_None) {
      SetExceptionFromRecordReader(self);
      return nullptr;
    }
  }
  Py_RETURN_FALSE;
}

static PyObject* RecordReaderClose(PyRecordReaderObject* self, PyObject* args) {
  if (ABSL_PREDICT_TRUE(self->record_reader.has_value())) {
    const bool close_ok =
        PythonUnlocked([&] { return self->record_reader->Close(); });
    if (ABSL_PREDICT_FALSE(!close_ok)) {
      SetExceptionFromRecordReader(self);
      return nullptr;
    }
  }
  Py_RETURN_NONE;
}

static PyObject* RecordReaderCheckFileFormat(PyRecordReaderObject* self,
                                             PyObject* args) {
  if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr;
  const bool check_file_format_ok =
      PythonUnlocked([&] { return self->record_reader->CheckFileFormat(); });
  if (ABSL_PREDICT_FALSE(!check_file_format_ok)) {
    if (ABSL_PREDICT_FALSE(RecordReaderHasException(self))) {
      SetExceptionFromRecordReader(self);
      return nullptr;
    }
    Py_RETURN_FALSE;
  }
  Py_RETURN_TRUE;
}

static PyObject* RecordReaderReadMetadata(PyRecordReaderObject* self,
                                          PyObject* args) {
  if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr;
  Chain metadata;
  const bool read_serialized_metadata_ok = PythonUnlocked(
      [&] { return self->record_reader->ReadSerializedMetadata(metadata); });
  if (ABSL_PREDICT_FALSE(!read_serialized_metadata_ok)) {
    if (ABSL_PREDICT_FALSE(RecordReaderHasException(self))) {
      SetExceptionFromRecordReader(self);
      return nullptr;
    }
    Py_RETURN_NONE;
  }
  const PythonPtr serialized_metadata = ChainToPython(metadata);
  if (ABSL_PREDICT_FALSE(serialized_metadata == nullptr)) return nullptr;
  // return RecordsMetadata.FromString(serialized_metadata)
  static constexpr ImportedConstant kRecordsMetadata(
      "riegeli.records.records_metadata_pb2", "RecordsMetadata");
  if (ABSL_PREDICT_FALSE(!kRecordsMetadata.Verify())) return nullptr;
  static constexpr ImportedConstant kDecodeError("google.protobuf.message",
                                                 "DecodeError");
  if (ABSL_PREDICT_FALSE(!kDecodeError.Verify())) return nullptr;
  static constexpr Identifier id_FromString("FromString");
  PythonPtr metadata_object(
      PyObject_CallMethodObjArgs(kRecordsMetadata.get(), id_FromString.get(),
                                 serialized_metadata.get(), nullptr));
  if (ABSL_PREDICT_FALSE(metadata_object == nullptr)) {
    if (self->record_reader->recovery() != nullptr &&
        PyErr_ExceptionMatches(kDecodeError.get())) {
      const Exception exception = Exception::Fetch();
      if (self->record_reader->recovery()(
              SkippedRegion(self->record_reader->last_pos().chunk_begin(),
                            self->record_reader->pos().numeric(),
                            exception.message()),
              *self->record_reader)) {
        // Recovered metadata decoding, assume empty `RecordsMetadata`.
        return PyObject_CallFunctionObjArgs(kRecordsMetadata.get(), nullptr);
      }
      if (ABSL_PREDICT_FALSE(self->recovery_exception.has_value())) {
        self->recovery_exception->Restore();
        return nullptr;
      }
      Py_RETURN_NONE;
    }
    return nullptr;
  }
  return metadata_object.release();
}

static PyObject* RecordReaderReadSerializedMetadata(PyRecordReaderObject* self,
                                                    PyObject* args) {
  if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr;
  Chain metadata;
  const bool read_serialized_metadata_ok = PythonUnlocked(
      [&] { return self->record_reader->ReadSerializedMetadata(metadata); });
  if (ABSL_PREDICT_FALSE(!read_serialized_metadata_ok)) {
    if (ABSL_PREDICT_FALSE(RecordReaderHasException(self))) {
      SetExceptionFromRecordReader(self);
      return nullptr;
    }
    Py_RETURN_NONE;
  }
  return ChainToPython(metadata).release();
}

static PyObject* RecordReaderReadRecord(PyRecordReaderObject* self,
                                        PyObject* args) {
  if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr;
  Chain record;
  const bool read_record_ok =
      PythonUnlocked([&] { return self->record_reader->ReadRecord(record); });
  if (ABSL_PREDICT_FALSE(!read_record_ok)) {
    if (ABSL_PREDICT_FALSE(RecordReaderHasException(self))) {
      SetExceptionFromRecordReader(self);
      return nullptr;
    }
    Py_RETURN_NONE;
  }
  return ChainToPython(record).release();
}

static PyObject* RecordReaderReadMessage(PyRecordReaderObject* self,
                                         PyObject* args, PyObject* kwargs) {
  static constexpr const char* keywords[] = {"message_type", nullptr};
  PyObject* message_type_arg;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "O:read_message", const_cast<char**>(keywords),
          &message_type_arg))) {
    return nullptr;
  }
  if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr;
  absl::string_view record;
  for (;;) {
    const bool read_record_ok =
        PythonUnlocked([&] { return self->record_reader->ReadRecord(record); });
    if (ABSL_PREDICT_FALSE(!read_record_ok)) {
      if (ABSL_PREDICT_FALSE(RecordReaderHasException(self))) {
        SetExceptionFromRecordReader(self);
        return nullptr;
      }
      Py_RETURN_NONE;
    }
    MemoryView memory_view;
    PyObject* const record_object = memory_view.ToPython(record);
    if (ABSL_PREDICT_FALSE(record_object == nullptr)) return nullptr;
    static constexpr ImportedConstant kDecodeError("google.protobuf.message",
                                                   "DecodeError");
    if (ABSL_PREDICT_FALSE(!kDecodeError.Verify())) return nullptr;
    // return message_type.FromString(record)
    static constexpr Identifier id_FromString("FromString");
    PythonPtr message(PyObject_CallMethodObjArgs(
        message_type_arg, id_FromString.get(), record_object, nullptr));
    if (ABSL_PREDICT_FALSE(message == nullptr)) {
      if (self->record_reader->recovery() != nullptr &&
          PyErr_ExceptionMatches(kDecodeError.get())) {
        const Exception exception = Exception::Fetch();
        if (ABSL_PREDICT_FALSE(!memory_view.Release())) return nullptr;
        if (self->record_reader->recovery()(
                SkippedRegion(self->record_reader->last_pos().numeric(),
                              self->record_reader->pos().numeric(),
                              exception.message()),
                *self->record_reader)) {
          continue;
        }
        if (ABSL_PREDICT_FALSE(self->recovery_exception.has_value())) {
          self->recovery_exception->Restore();
          return nullptr;
        }
        Py_RETURN_NONE;
      }
      return nullptr;
    }
    if (ABSL_PREDICT_FALSE(!memory_view.Release())) return nullptr;
    return message.release();
  }
}

static PyRecordIterObject* RecordReaderReadRecords(PyRecordReaderObject* self,
                                                   PyObject* args) {
  std::unique_ptr<PyRecordIterObject, Deleter> iter(
      PyObject_GC_New(PyRecordIterObject, &PyRecordIter_Type));
  if (ABSL_PREDICT_FALSE(iter == nullptr)) return nullptr;
  iter->read_record = [](PyRecordReaderObject* self, PyObject* args) {
    return RecordReaderReadRecord(self, args);
  };
  Py_INCREF(self);
  iter->record_reader = self;
  iter->args = nullptr;
  return iter.release();
}

static PyRecordIterObject* RecordReaderReadMessages(PyRecordReaderObject* self,
                                                    PyObject* args,
                                                    PyObject* kwargs) {
  static constexpr const char* keywords[] = {"message_type", nullptr};
  PyObject* message_type_arg;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "O:read_messages", const_cast<char**>(keywords),
          &message_type_arg))) {
    return nullptr;
  }
  std::unique_ptr<PyRecordIterObject, Deleter> iter(
      PyObject_GC_New(PyRecordIterObject, &PyRecordIter_Type));
  if (ABSL_PREDICT_FALSE(iter == nullptr)) return nullptr;
  iter->read_record = [](PyRecordReaderObject* self, PyObject* args) {
    return RecordReaderReadMessage(self, args, nullptr);
  };
  Py_INCREF(self);
  iter->record_reader = self;
  iter->args = PyTuple_Pack(1, message_type_arg);
  if (ABSL_PREDICT_FALSE(iter->args == nullptr)) return nullptr;
  return iter.release();
}

static PyObject* RecordReaderSetFieldProjection(PyRecordReaderObject* self,
                                                PyObject* args,
                                                PyObject* kwargs) {
  static constexpr const char* keywords[] = {"field_projection", nullptr};
  PyObject* field_projection_arg;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "O:set_field_projection", const_cast<char**>(keywords),
          &field_projection_arg))) {
    return nullptr;
  }
  std::optional<FieldProjection> field_projection;
  if (field_projection_arg == Py_None) {
    field_projection = FieldProjection::All();
  } else {
    field_projection = FieldProjectionFromPython(field_projection_arg);
    if (ABSL_PREDICT_FALSE(field_projection == std::nullopt)) return nullptr;
  }
  if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr;
  const bool set_field_projection_ok = PythonUnlocked([&] {
    return self->record_reader->SetFieldProjection(
        *std::move(field_projection));
  });
  if (ABSL_PREDICT_FALSE(!set_field_projection_ok)) {
    SetExceptionFromRecordReader(self);
    return nullptr;
  }
  Py_RETURN_NONE;
}

static PyObject* RecordReaderLastPos(PyRecordReaderObject* self,
                                     void* closure) {
  if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr;
  if (ABSL_PREDICT_FALSE(!kRecordPositionApi.Verify())) return nullptr;
  if (ABSL_PREDICT_FALSE(!self->record_reader->last_record_is_valid())) {
    SetRiegeliError(absl::FailedPreconditionError("No record was read"));
    return nullptr;
  }
  return kRecordPositionApi
      ->RecordPositionToPython(self->record_reader->last_pos())
      .release();
}

static PyObject* RecordReaderPos(PyRecordReaderObject* self, void* closure) {
  if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr;
  if (ABSL_PREDICT_FALSE(!kRecordPositionApi.Verify())) return nullptr;
  return kRecordPositionApi->RecordPositionToPython(self->record_reader->pos())
      .release();
}

static PyObject* RecordReaderSupportsRandomAccess(PyRecordReaderObject* self,
                                                  void* closure) {
  if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr;
  return PyBool_FromLong(self->record_reader->SupportsRandomAccess());
}

static PyObject* RecordReaderSeek(PyRecordReaderObject* self, PyObject* args,
                                  PyObject* kwargs) {
  static constexpr const char* keywords[] = {"pos", nullptr};
  PyObject* pos_arg;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "O:seek", const_cast<char**>(keywords), &pos_arg))) {
    return nullptr;
  }
  if (ABSL_PREDICT_FALSE(!kRecordPositionApi.Verify())) return nullptr;
  const std::optional<RecordPosition> pos =
      kRecordPositionApi->RecordPositionFromPython(pos_arg);
  if (ABSL_PREDICT_FALSE(pos == std::nullopt)) return nullptr;
  if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr;
  const bool seek_ok =
      PythonUnlocked([&] { return self->record_reader->Seek(*pos); });
  if (ABSL_PREDICT_FALSE(!seek_ok)) {
    SetExceptionFromRecordReader(self);
    return nullptr;
  }
  Py_RETURN_NONE;
}

static PyObject* RecordReaderSeekNumeric(PyRecordReaderObject* self,
                                         PyObject* args, PyObject* kwargs) {
  static constexpr const char* keywords[] = {"pos", nullptr};
  PyObject* pos_arg;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "O:seek_numeric", const_cast<char**>(keywords),
          &pos_arg))) {
    return nullptr;
  }
  const std::optional<Position> pos = PositionFromPython(pos_arg);
  if (ABSL_PREDICT_FALSE(pos == std::nullopt)) return nullptr;
  if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr;
  const bool seek_ok =
      PythonUnlocked([&] { return self->record_reader->Seek(*pos); });
  if (ABSL_PREDICT_FALSE(!seek_ok)) {
    SetExceptionFromRecordReader(self);
    return nullptr;
  }
  Py_RETURN_NONE;
}

static PyObject* RecordReaderSeekBack(PyRecordReaderObject* self,
                                      PyObject* args) {
  if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr;
  const bool seek_back_ok =
      PythonUnlocked([&] { return self->record_reader->SeekBack(); });
  if (ABSL_PREDICT_FALSE(!seek_back_ok)) {
    if (ABSL_PREDICT_FALSE(RecordReaderHasException(self))) {
      SetExceptionFromRecordReader(self);
      return nullptr;
    }
    Py_RETURN_FALSE;
  }
  Py_RETURN_TRUE;
}

static PyObject* RecordReaderSize(PyRecordReaderObject* self, PyObject* args) {
  if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr;
  const std::optional<Position> size =
      PythonUnlocked([&] { return self->record_reader->Size(); });
  if (ABSL_PREDICT_FALSE(size == std::nullopt)) {
    SetExceptionFromRecordReader(self);
    return nullptr;
  }
  return PositionToPython(*size).release();
}

static PyObject* RecordReaderSearch(PyRecordReaderObject* self, PyObject* args,
                                    PyObject* kwargs) {
  static constexpr const char* keywords[] = {"test", nullptr};
  PyObject* test_arg;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "O:search", const_cast<char**>(keywords), &test_arg))) {
    return nullptr;
  }
  if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr;
  std::optional<Exception> test_exception;
  const std::optional<PartialOrdering> result = PythonUnlocked([&] {
    return self->record_reader->Search(
        [&](RecordReaderBase&) -> std::optional<PartialOrdering> {
          PythonLock lock;
          const PythonPtr test_result(
              PyObject_CallFunctionObjArgs(test_arg, self, nullptr));
          if (ABSL_PREDICT_FALSE(test_result == nullptr)) {
            test_exception.emplace(Exception::Fetch());
            return std::nullopt;
          }
          const std::optional<PartialOrdering> ordering =
              PartialOrderingFromPython(test_result.get());
          if (ABSL_PREDICT_FALSE(ordering == std::nullopt)) {
            test_exception.emplace(Exception::Fetch());
            return std::nullopt;
          }
          return *ordering;
        });
  });
  if (ABSL_PREDICT_FALSE(result == std::nullopt)) {
    if (test_exception != std::nullopt) {
      test_exception->Restore();
    } else {
      SetExceptionFromRecordReader(self);
    }
    return nullptr;
  }
  return PartialOrderingToPython(*result).release();
}

static PyObject* RecordReaderSearchForRecord(PyRecordReaderObject* self,
                                             PyObject* args, PyObject* kwargs) {
  static constexpr const char* keywords[] = {"test", nullptr};
  PyObject* test_arg;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "O:search_for_record", const_cast<char**>(keywords),
          &test_arg))) {
    return nullptr;
  }
  if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr;
  std::optional<Exception> test_exception;
  const std::optional<PartialOrdering> result = PythonUnlocked([&] {
    return self->record_reader->Search<Chain>(
        [&](const Chain& record) -> std::optional<PartialOrdering> {
          PythonLock lock;
          const PythonPtr record_object = ChainToPython(record);
          if (ABSL_PREDICT_FALSE(record_object == nullptr)) {
            test_exception.emplace(Exception::Fetch());
            return std::nullopt;
          }
          const PythonPtr test_result(PyObject_CallFunctionObjArgs(
              test_arg, record_object.get(), nullptr));
          if (ABSL_PREDICT_FALSE(test_result == nullptr)) {
            test_exception.emplace(Exception::Fetch());
            return std::nullopt;
          }
          const std::optional<PartialOrdering> ordering =
              PartialOrderingFromPython(test_result.get());
          if (ABSL_PREDICT_FALSE(ordering == std::nullopt)) {
            test_exception.emplace(Exception::Fetch());
            return std::nullopt;
          }
          return *ordering;
        });
  });
  if (ABSL_PREDICT_FALSE(result == std::nullopt)) {
    if (test_exception != std::nullopt) {
      test_exception->Restore();
      if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
        PyErr_Clear();
        Py_RETURN_NONE;
      }
    } else {
      SetExceptionFromRecordReader(self);
    }
    return nullptr;
  }
  return PartialOrderingToPython(*result).release();
}

static PyObject* RecordReaderSearchForMessage(PyRecordReaderObject* self,
                                              PyObject* args,
                                              PyObject* kwargs) {
  static constexpr const char* keywords[] = {"message_type", "test", nullptr};
  PyObject* message_type_arg;
  PyObject* test_arg;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "OO:search_for_message", const_cast<char**>(keywords),
          &message_type_arg, &test_arg))) {
    return nullptr;
  }
  if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr;
  static constexpr ImportedConstant kDecodeError("google.protobuf.message",
                                                 "DecodeError");
  if (ABSL_PREDICT_FALSE(!kDecodeError.Verify())) return nullptr;
  // `RecordReader::Search(test)` sets the recovery function to `nullptr` while
  // calling `test()`. Save it here to call it explicitly in `test()`.
  std::function<bool(const SkippedRegion&, RecordReaderBase&)> recovery =
      self->record_reader->recovery();
  std::optional<Exception> test_exception;
  const std::optional<PartialOrdering> result = PythonUnlocked([&] {
    return self->record_reader->Search<absl::string_view>(
        [&](absl::string_view record) -> std::optional<PartialOrdering> {
          PythonLock lock;
          MemoryView memory_view;
          PyObject* const record_object = memory_view.ToPython(record);
          if (ABSL_PREDICT_FALSE(record_object == nullptr)) {
            test_exception.emplace(Exception::Fetch());
            return std::nullopt;
          }
          // message = message_type.FromString(record)
          static constexpr Identifier id_FromString("FromString");
          const PythonPtr message(PyObject_CallMethodObjArgs(
              message_type_arg, id_FromString.get(), record_object, nullptr));
          if (ABSL_PREDICT_FALSE(message == nullptr)) {
            if (recovery != nullptr &&
                PyErr_ExceptionMatches(kDecodeError.get())) {
              const Exception exception = Exception::Fetch();
              if (ABSL_PREDICT_FALSE(!memory_view.Release())) {
                test_exception.emplace(Exception::Fetch());
                return std::nullopt;
              }
              if (recovery(
                      SkippedRegion(self->record_reader->last_pos().numeric(),
                                    self->record_reader->pos().numeric(),
                                    exception.message()),
                      *self->record_reader)) {
                // Declare the skipped record unordered.
                return PartialOrdering::unordered;
              }
              if (ABSL_PREDICT_FALSE(self->recovery_exception.has_value())) {
                return std::nullopt;
              }
              // Cancel the search.
              PyErr_SetNone(PyExc_StopIteration);
            }
            test_exception.emplace(Exception::Fetch());
            return std::nullopt;
          }
          if (ABSL_PREDICT_FALSE(!memory_view.Release())) {
            test_exception.emplace(Exception::Fetch());
            return std::nullopt;
          }
          const PythonPtr test_result(
              PyObject_CallFunctionObjArgs(test_arg, message.get(), nullptr));
          if (ABSL_PREDICT_FALSE(test_result == nullptr)) {
            test_exception.emplace(Exception::Fetch());
            return std::nullopt;
          }
          const std::optional<PartialOrdering> ordering =
              PartialOrderingFromPython(test_result.get());
          if (ABSL_PREDICT_FALSE(ordering == std::nullopt)) {
            test_exception.emplace(Exception::Fetch());
            return std::nullopt;
          }
          return *ordering;
        });
  });
  if (ABSL_PREDICT_FALSE(result == std::nullopt)) {
    if (test_exception != std::nullopt) {
      test_exception->Restore();
      if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
        PyErr_Clear();
        Py_RETURN_NONE;
      }
    } else {
      SetExceptionFromRecordReader(self);
    }
    return nullptr;
  }
  return PartialOrderingToPython(*result).release();
}

}  // extern "C"

const PyMethodDef RecordReaderMethods[] = {
    {"__enter__", RecordReaderEnter, METH_NOARGS,
     R"doc(
__enter__(self) -> RecordReader

Returns self.
)doc"},
    {"__exit__", reinterpret_cast<PyCFunction>(RecordReaderExit), METH_VARARGS,
     R"doc(
__exit__(self, exc_type, exc_value, traceback) -> bool

Calls close().

Suppresses exceptions from close() if an exception is already in flight.

Args:
  exc_type: None or exception in flight (type).
  exc_value: None or exception in flight (value).
  traceback: None or exception in flight (traceback).
)doc"},
    {"close", reinterpret_cast<PyCFunction>(RecordReaderClose), METH_NOARGS,
     R"doc(
close(self) -> None

Indicates that reading is done.

Verifies that the file is not truncated at the current position, i.e. that it
either has more data or ends cleanly. Marks the RecordReader as closed,
disallowing further reading.

If the RecordReader was failed, raises the same exception again.

If the RecordReader was not failed but already closed, does nothing.
)doc"},
    {"check_file_format",
     reinterpret_cast<PyCFunction>(RecordReaderCheckFileFormat), METH_NOARGS,
     R"doc(
check_file_format(self) -> bool

Ensures that the file looks like a valid Riegeli/Records file.

Reading functions already check the file format. check_file_format() can verify
the file format before (or instead of) performing other operations.

This ignores the recovery function. If invalid file contents are skipped, then
checking the file format is meaningless: any file can be read.

Returns:
  True if this looks like a Riegeli/records file. False if the file ends before
  this could be determined.

Raises:
  RiegeliError: If this is not a Riegeli/records file.
)doc"},
    {"read_metadata", reinterpret_cast<PyCFunction>(RecordReaderReadMetadata),
     METH_NOARGS, R"doc(
read_metadata(self) -> RecordsMetadata | None

Returns file metadata.

Record type in metadata can be conveniently interpreted by get_record_type().

read_metadata() must be called while the RecordReader is at the beginning of the
file (calling check_file_format() before is allowed).

Returns:
  File metadata as parsed RecordsMetadata message, or None at end of file.
)doc"},
    {"read_serialized_metadata",
     reinterpret_cast<PyCFunction>(RecordReaderReadSerializedMetadata),
     METH_NOARGS, R"doc(
read_serialized_metadata(self) -> bytes | None

Returns file metadata.

This is like read_metadata(), but metadata is returned in the serialized form.
This is faster if the caller needs metadata already serialized.

Returns:
  File metadata as serialized RecordsMetadata message, or None at end of file.
)doc"},
    {"read_record", reinterpret_cast<PyCFunction>(RecordReaderReadRecord),
     METH_NOARGS, R"doc(
read_record(self) -> bytes | None

Reads the next record.

Returns:
  The record read as bytes, or None at end of file.
)doc"},
    {"read_message", reinterpret_cast<PyCFunction>(RecordReaderReadMessage),
     METH_VARARGS | METH_KEYWORDS, R"doc(
read_message(self, message_type: type[Message]) -> Message | None

Reads the next record.

Args:
  message_type: Type of the message to parse the record as.

Returns:
  The record read as a parsed message, or None at end of file.
)doc"},
    {"read_records", reinterpret_cast<PyCFunction>(RecordReaderReadRecords),
     METH_NOARGS, R"doc(
read_records(self) -> Iterator[bytes]

Returns an iterator which reads all remaining records.

Yields:
  The next record read as bytes.
)doc"},
    {"read_messages", reinterpret_cast<PyCFunction>(RecordReaderReadMessages),
     METH_VARARGS | METH_KEYWORDS, R"doc(
read_messages(self, message_type: type[Message]) -> Iterator[Message]

Returns an iterator which reads all remaining records.

Yields:
  The next record read as parsed message.
)doc"},
    {"set_field_projection",
     reinterpret_cast<PyCFunction>(RecordReaderSetFieldProjection),
     METH_VARARGS | METH_KEYWORDS, R"doc(
set_field_projection(
    self, field_projection: Iterable[Iterable[int]] | None
) -> None

Like field_projection constructor argument, but can be done at any time.

Args:
  field_projection: If not None, the set of fields to be included in returned
    records, allowing to exclude the remaining fields (but does not guarantee
    that they will be excluded). Excluding data makes reading faster. Projection
    is effective if the file has been written with "transpose" in RecordWriter
    options. Additionally, "bucket_fraction" in RecordWriter options with a
    lower value can make reading with projection faster. A field projection is
    specified as an iterable of field paths. A field path is specified as an
    iterable of proto field numbers descending from the root message. A special
    field EXISTENCE_ONLY can be added to the end of the path; it preserves
    field existence but ignores its value; warning: for a repeated field this
    preserves the field count only if the field is not packed.
)doc"},
    {"seek", reinterpret_cast<PyCFunction>(RecordReaderSeek),
     METH_VARARGS | METH_KEYWORDS, R"doc(
seek(self, pos: RecordPosition) -> None

Seeks to a position.

The position should have been obtained by pos for the same file.

Args:
  pos: Seek target.
)doc"},
    {"seek_numeric", reinterpret_cast<PyCFunction>(RecordReaderSeekNumeric),
     METH_VARARGS | METH_KEYWORDS, R"doc(
seek_numeric(self, pos: int) -> None

Seeks to a position.

The position can be any integer between 0 and file size. If it points between
records, it is interpreted as the next record.

Args:
  pos: Seek target.
)doc"},
    {"seek_back", reinterpret_cast<PyCFunction>(RecordReaderSeekBack),
     METH_NOARGS, R"doc(
seek_back(self) -> bool

Seeks back by one record.

Returns:
  If successful, True. Returns False at the beginning of the file.
)doc"},
    {"size", reinterpret_cast<PyCFunction>(RecordReaderSize), METH_NOARGS,
     R"doc(
size(self) -> int

Returns the size of the file in bytes.

This is the position corresponding to its end.
)doc"},
    {"search", reinterpret_cast<PyCFunction>(RecordReaderSearch),
     METH_VARARGS | METH_KEYWORDS,
     R"doc(
search(self, test: Callable[[RecordReader], int | None]) -> None

Searches the file for a desired record, or for a desired position between
records, given that it is possible to determine whether a given record is before
or after the desired position.

The current position before calling search() does not matter.

Args:
  test: A function which takes the RecordReader as a parameter, seeked to some
    record, and returns an int or None:
     * < 0:  The current record is before the desired position.
     * == 0: The current record is desired, searching can stop.
     * > 0:  The current record is after the desired position.
     * None: It could not be determined which is the case. The current record
             will be skipped.
    It can also raise StopIteration to cancel the search.

Preconditions:
 * All < 0 records precede all == 0 records.
 * All == 0 records precede all > 0 records.
 * All < 0 records precede all > 0 records, even if there are no == 0 records.

Return values:
 * 0: There is some == 0 record, and search() points to some such record.
 * 1: There are no == 0 records but there is some > 0 record, and search()
   points to the earliest such record.
 * -1: There are no == 0 nor > 0 records, but there is some < 0 record, and
   search() points to the end of file.
 * None: All records are None, and search() points to the end of file,
   or search() was cancelled.

To find the earliest == 0 record instead of an arbitrary one, test() can be
changed to return > 0 in place of == 0.

Further guarantees:
 * If a test() returns == 0, search() points back to the record before test()
   and returns.
 * If a test() returns < 0, test() will not be called again at earlier
   positions.
 * If a test() returns > 0, test() will not be called again at later positions.
 * test() will not be called again at the same position.

It follows that if a test() returns == 0 or > 0, search() points to the record
before the last test() call with one of these results. This allows to
communicate additional context of a == 0 or > 0 result by a side effect of
test().
)doc"},
    {"search_for_record",
     reinterpret_cast<PyCFunction>(RecordReaderSearchForRecord),
     METH_VARARGS | METH_KEYWORDS,
     R"doc(
search_for_record(self, test: Callable[[bytes], int | None]) -> None

A variant of search() which reads a record before calling test(), instead of
letting test() read the record.

Args:
  test: A function which takes the record read as bytes as a parameter, and
    returns an int or None, like in search().
)doc"},
    {"search_for_message",
     reinterpret_cast<PyCFunction>(RecordReaderSearchForMessage),
     METH_VARARGS | METH_KEYWORDS,
     R"doc(
search_for_message(
    self, message_type: type[Message],
    test: Callable[[Message], int | None]
) -> None

A variant of search() which reads a record before calling test(), instead of
letting test() read the record.

Args:
  message_type: Type of the message to parse the record as.
  test: A function which takes the record read as a parsed message as a
    parameter, and returns an int or None, like in search().
)doc"},
    {nullptr, nullptr, 0, nullptr},
};

const PyGetSetDef RecordReaderGetSet[] = {
    {const_cast<char*>("src"), reinterpret_cast<getter>(RecordReaderSrc),
     nullptr, const_cast<char*>(R"doc(
src: BinaryIO

Binary IO stream being read from.
)doc"),
     nullptr},
    {const_cast<char*>("last_pos"),
     reinterpret_cast<getter>(RecordReaderLastPos), nullptr,
     const_cast<char*>(R"doc(
last_pos: RecordPosition

The canonical position of the last record read.

The canonical position is the largest among all equivalent positions.
Seeking to any equivalent position leads to reading the same record.

last_pos.numeric returns the position as an int.

Precondition:
  a record was successfully read and there was no intervening call to
  close(), seek(), seek_numeric(), seek_back(), search(), search_for_record(),
  or search_for_message().
)doc"),
     nullptr},
    {const_cast<char*>("pos"), reinterpret_cast<getter>(RecordReaderPos),
     nullptr, const_cast<char*>(R"doc(
pos: RecordPosition

A position of the next record.

A position of the next record (or the end of file if there is no next record).

A position which is not canonical can be smaller than the equivalent canonical
position. Seeking to any equivalent position leads to reading the same record.

pos.numeric returns the position as an int.

pos is unchanged by close().
)doc"),
     nullptr},
    {const_cast<char*>("supports_random_access"),
     reinterpret_cast<getter>(RecordReaderSupportsRandomAccess), nullptr,
     const_cast<char*>(R"doc(
supports_random_access: bool

True if this RecordReader supports random access.

This includes seek(), seek_numeric(), and size().
)doc"),
     nullptr},
    {nullptr, nullptr, nullptr, nullptr, nullptr}};

PyTypeObject PyRecordReader_Type = {
    // clang-format off
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
    // clang-format on
    "riegeli.records.record_reader.RecordReader",          // tp_name
    sizeof(PyRecordReaderObject),                          // tp_basicsize
    0,                                                     // tp_itemsize
    reinterpret_cast<destructor>(RecordReaderDestructor),  // tp_dealloc
#if PY_VERSION_HEX >= 0x03080000
    0,  // tp_vectorcall_offset
#else
    nullptr,  // tp_print
#endif
    nullptr,                                       // tp_getattr
    nullptr,                                       // tp_setattr
    nullptr,                                       // tp_as_async
    reinterpret_cast<reprfunc>(RecordReaderRepr),  // tp_repr
    nullptr,                                       // tp_as_number
    nullptr,                                       // tp_as_sequence
    nullptr,                                       // tp_as_mapping
    nullptr,                                       // tp_hash
    nullptr,                                       // tp_call
    nullptr,                                       // tp_str
    nullptr,                                       // tp_getattro
    nullptr,                                       // tp_setattro
    nullptr,                                       // tp_as_buffer
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,  // tp_flags
    R"doc(
RecordReader(
    src: BinaryIO,
    *,
    owns_src: bool = True,
    assumed_pos: int | None = None,
    min_buffer_size: int = 4 << 10,
    max_buffer_size: int = 64 << 10,
    buffer_size: int | None,
    field_projection: Iterable[Iterable[int]] | None = None,
    recovery: Callable[[SkippedRegion], Any] | None = None) -> RecordReader

Will read from the given file.

Args:
  src: Binary IO stream to read from.
  owns_src: If True, src is owned, and close() or __exit__() calls src.close().
  assumed_pos: If None, src must support random access, RecordReader will
    support random access, and RecordReader will set the position of src on
    close(). If an int, it is enough that src supports sequential access, and
    this position will be assumed initially.
  min_buffer_size: Tunes the minimal buffer size, which determines how much data
    at a time is typically read from src. The actual buffer size changes between
    min_buffer_size and max_buffer_size depending on the access pattern.
  max_buffer_size: Tunes the maximal buffer size, which determines how much data
    at a time is typically read from src. The actual buffer size changes between
    min_buffer_size and max_buffer_size depending on the access pattern.
  buffer_size: If not None, a shortcut for setting min_buffer_size and
    max_buffer_size to the same value.
  field_projection: If not None, the set of fields to be included in returned
    records, allowing to exclude the remaining fields (but does not guarantee
    that they will be excluded). Excluding data makes reading faster. Projection
    is effective if the file has been written with "transpose" in RecordWriter
    options. Additionally, "bucket_fraction" in RecordWriter options with a
    lower value can make reading with projection faster. A field projection is
    specified as an iterable of field paths. A field path is specified as an
    iterable of proto field numbers descending from the root message. A special
    field EXISTENCE_ONLY can be added to the end of the path; it preserves
    field existence but ignores its value; warning: for a repeated field this
    preserves the field count only if the field is not packed.
  recovery: If None, then invalid file contents cause RecordReader to raise
    RiegeliError. If not None, then invalid file contents cause RecordReader to
    skip over the invalid region and call this recovery function with a
    SkippedRegion as an argument. If the recovery function returns normally,
    reading continues. If the recovery function raises StopIteration, reading
    ends. If close() is called and file contents were truncated, the recovery
    function is called if set; the RecordReader remains closed.

The src argument should be a binary IO stream which supports:
 * close()          - for close() or __exit__() if owns_src
 * readinto1(memoryview) or readinto(memoryview) or read1(int) or read(int)
 * seek(int[, int]) - if assumed_pos is None,
                      or for seek(), seek_numeric(), or size()
 * tell()           - if assumed_pos is None,
                      or for seek(), seek_numeric(), or size()

Example values for src:
 * io.FileIO(filename, 'rb')
 * io.open(filename, 'rb') - better with buffering=0, or use io.FileIO() instead
 * open(filename, 'rb')    - better with buffering=0, or use io.FileIO() instead
 * io.BytesIO(contents)
 * tf.io.gfile.GFile(filename, 'rb')

Warning: if owns_src is False and assumed_pos is not None, src will have an
unpredictable amount of extra data consumed because of buffering.
)doc",                                                              // tp_doc
    reinterpret_cast<traverseproc>(RecordReaderTraverse),  // tp_traverse
    reinterpret_cast<inquiry>(RecordReaderClear),          // tp_clear
    nullptr,                                               // tp_richcompare
    0,                                                     // tp_weaklistoffset
    nullptr,                                               // tp_iter
    nullptr,                                               // tp_iternext
    const_cast<PyMethodDef*>(RecordReaderMethods),         // tp_methods
    nullptr,                                               // tp_members
    const_cast<PyGetSetDef*>(RecordReaderGetSet),          // tp_getset
    nullptr,                                               // tp_base
    nullptr,                                               // tp_dict
    nullptr,                                               // tp_descr_get
    nullptr,                                               // tp_descr_set
    0,                                                     // tp_dictoffset
    reinterpret_cast<initproc>(RecordReaderInit),          // tp_init
    nullptr,                                               // tp_alloc
    PyType_GenericNew,                                     // tp_new
    nullptr,                                               // tp_free
    nullptr,                                               // tp_is_gc
    nullptr,                                               // tp_bases
    nullptr,                                               // tp_mro
    nullptr,                                               // tp_cache
    nullptr,                                               // tp_subclasses
    nullptr,                                               // tp_weaklist
    nullptr,                                               // tp_del
    0,                                                     // tp_version_tag
    nullptr,                                               // tp_finalize
};

// `extern "C"` sets the C calling convention for compatibility with the Python
// API. `static` avoids making symbols public, as `extern "C"` trumps anonymous
// namespace.
extern "C" {

static void RecordIterDestructor(PyRecordIterObject* self) {
  PyObject_GC_UnTrack(self);
#if PY_VERSION_HEX < 0x030D0000  // < 3.13
  Py_TRASHCAN_BEGIN(self, RecordIterDestructor);
#endif
  Py_XDECREF(self->record_reader);
  Py_XDECREF(self->args);
  Py_TYPE(self)->tp_free(self);
#if PY_VERSION_HEX < 0x030D0000  // < 3.13
  Py_TRASHCAN_END;
#endif
}

static int RecordIterTraverse(PyRecordIterObject* self, visitproc visit,
                              void* arg) {
  Py_VISIT(self->record_reader);
  Py_VISIT(self->args);
  return 0;
}

static int RecordIterClear(PyRecordIterObject* self) {
  Py_CLEAR(self->record_reader);
  Py_CLEAR(self->args);
  return 0;
}

static PyObject* RecordIterNext(PyRecordIterObject* self) {
  PythonPtr read_record_result(
      self->read_record(self->record_reader, self->args));
  if (ABSL_PREDICT_FALSE(read_record_result.get() == Py_None)) return nullptr;
  return read_record_result.release();
}

}  // extern "C"

PyTypeObject PyRecordIter_Type = {
    // clang-format off
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
    // clang-format on
    "RecordIter",                                        // tp_name
    sizeof(PyRecordIterObject),                          // tp_basicsize
    0,                                                   // tp_itemsize
    reinterpret_cast<destructor>(RecordIterDestructor),  // tp_dealloc
#if PY_VERSION_HEX >= 0x03080000
    0,  // tp_vectorcall_offset
#else
    nullptr,  // tp_print
#endif
    nullptr,                                             // tp_getattr
    nullptr,                                             // tp_setattr
    nullptr,                                             // tp_as_async
    nullptr,                                             // tp_repr
    nullptr,                                             // tp_as_number
    nullptr,                                             // tp_as_sequence
    nullptr,                                             // tp_as_mapping
    nullptr,                                             // tp_hash
    nullptr,                                             // tp_call
    nullptr,                                             // tp_str
    nullptr,                                             // tp_getattro
    nullptr,                                             // tp_setattro
    nullptr,                                             // tp_as_buffer
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,             // tp_flags
    nullptr,                                             // tp_doc
    reinterpret_cast<traverseproc>(RecordIterTraverse),  // tp_traverse
    reinterpret_cast<inquiry>(RecordIterClear),          // tp_clear
    nullptr,                                             // tp_richcompare
    0,                                                   // tp_weaklistoffset
    PyObject_SelfIter,                                   // tp_iter
    reinterpret_cast<iternextfunc>(RecordIterNext),      // tp_iternext
    nullptr,                                             // tp_methods
    nullptr,                                             // tp_members
    nullptr,                                             // tp_getset
    nullptr,                                             // tp_base
    nullptr,                                             // tp_dict
    nullptr,                                             // tp_descr_get
    nullptr,                                             // tp_descr_set
    0,                                                   // tp_dictoffset
    nullptr,                                             // tp_init
    nullptr,                                             // tp_alloc
    nullptr,                                             // tp_new
    nullptr,                                             // tp_free
    nullptr,                                             // tp_is_gc
    nullptr,                                             // tp_bases
    nullptr,                                             // tp_mro
    nullptr,                                             // tp_cache
    nullptr,                                             // tp_subclasses
    nullptr,                                             // tp_weaklist
    nullptr,                                             // tp_del
    0,                                                   // tp_version_tag
    nullptr,                                             // tp_finalize
};

const char* const kModuleName = "riegeli.records.record_reader";
const char kModuleDoc[] = R"doc(Reads records from a Riegeli/records file.)doc";

const PyMethodDef kModuleMethods[] = {
    {"get_record_type", reinterpret_cast<PyCFunction>(GetRecordType),
     METH_VARARGS | METH_KEYWORDS,
     R"doc(
get_record_type(metadata: RecordsMetadata) -> type[Message] | None

Interprets record_type_name and file_descriptor from metadata.

Args:
  metadata: Riegeli/records file metadata, typically returned by
    RecordReader.read_metadata().

Returns:
  A generated message type corresponding to the type of records, or None if that
  information is not available in metadata.
)doc"},
    {nullptr, nullptr, 0, nullptr},
};

PyModuleDef kModuleDef = {
    PyModuleDef_HEAD_INIT,
    kModuleName,                               // m_name
    kModuleDoc,                                // m_doc
    -1,                                        // m_size
    const_cast<PyMethodDef*>(kModuleMethods),  // m_methods
    nullptr,                                   // m_slots
    nullptr,                                   // m_traverse
    nullptr,                                   // m_clear
    nullptr,                                   // m_free
};

PyObject* InitModule() {
  if (ABSL_PREDICT_FALSE(PyType_Ready(&PyRecordReader_Type) < 0)) {
    return nullptr;
  }
  if (ABSL_PREDICT_FALSE(PyType_Ready(&PyRecordIter_Type) < 0)) {
    return nullptr;
  }
  PythonPtr module(PyModule_Create(&kModuleDef));
  if (ABSL_PREDICT_FALSE(module == nullptr)) return nullptr;
  PythonPtr existence_only = IntToPython(Field::kExistenceOnly);
  if (ABSL_PREDICT_FALSE(existence_only == nullptr)) return nullptr;
  if (ABSL_PREDICT_FALSE(PyModule_AddObject(module.get(), "EXISTENCE_ONLY",
                                            existence_only.release()) < 0)) {
    return nullptr;
  }
  Py_INCREF(&PyRecordReader_Type);
  if (ABSL_PREDICT_FALSE(PyModule_AddObject(module.get(), "RecordReader",
                                            reinterpret_cast<PyObject*>(
                                                &PyRecordReader_Type)) < 0)) {
    return nullptr;
  }
  return module.release();
}

}  // namespace

PyMODINIT_FUNC PyInit_record_reader() { return InitModule(); }

}  // namespace riegeli::python


================================================
FILE: python/riegeli/records/record_writer.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// From https://docs.python.org/3/c-api/intro.html:
// Since Python may define some pre-processor definitions which affect the
// standard headers on some systems, you must include Python.h before any
// standard headers are included.
#define PY_SSIZE_T_CLEAN
#include <Python.h>
// clang-format: do not reorder the above include.

#include <stddef.h>

#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "python/riegeli/base/utils.h"
#include "python/riegeli/bytes/python_writer.h"
#include "python/riegeli/records/record_position.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/types.h"
#include "riegeli/records/record_writer.h"

namespace riegeli::python {

namespace {

constexpr ImportedCapsule<RecordPositionApi> kRecordPositionApi(
    kRecordPositionCapsuleName);

PyObject* PyFlushType_Type;

PythonPtr DefineFlushType() {
  static constexpr ImportedConstant kEnum("enum", "Enum");
  if (ABSL_PREDICT_FALSE(!kEnum.Verify())) return nullptr;
  static constexpr Identifier id_FlushType("FlushType");
  const PythonPtr values(Py_BuildValue(
      "((si)(si)(si))", "FROM_OBJECT", static_cast<int>(FlushType::kFromObject),
      "FROM_PROCESS", static_cast<int>(FlushType::kFromProcess), "FROM_MACHINE",
      static_cast<int>(FlushType::kFromMachine)));
  if (ABSL_PREDICT_FALSE(values == nullptr)) return nullptr;
  return PythonPtr(PyObject_CallFunctionObjArgs(kEnum.get(), id_FlushType.get(),
                                                values.get(), nullptr));
}

bool FlushTypeFromPython(PyObject* object, FlushType* value) {
  RIEGELI_ASSERT_NE(PyFlushType_Type, nullptr)
      << "Python FlushType not defined yet";
  if (ABSL_PREDICT_FALSE(!PyObject_IsInstance(object, PyFlushType_Type))) {
    PyErr_Format(PyExc_TypeError, "Expected FlushType, not %s",
                 Py_TYPE(object)->tp_name);
    return false;
  }
  static constexpr Identifier id_value("value");
  const PythonPtr enum_value(PyObject_GetAttr(object, id_value.get()));
  if (ABSL_PREDICT_FALSE(enum_value == nullptr)) return false;
  const long long_value = PyLong_AsLong(enum_value.get());
  if (ABSL_PREDICT_FALSE(long_value == -1) && PyErr_Occurred()) return false;
  *value = static_cast<FlushType>(long_value);
  return true;
}

class FileDescriptorCollector {
 public:
  bool Init(PyObject* file_descriptors) {
    file_descriptors_ = file_descriptors;
    files_seen_.reset(PySet_New(nullptr));
    return files_seen_ != nullptr;
  }

  bool AddFile(PyObject* file_descriptor) {
    // name = file_descriptor.name
    static constexpr Identifier id_name("name");
    const PythonPtr name(PyObject_GetAttr(file_descriptor, id_name.get()));
    if (ABSL_PREDICT_FALSE(name == nullptr)) return false;
    // if name in self.files_seen: return
    const int contains = PySet_Contains(files_seen_.get(), name.get());
    if (ABSL_PREDICT_FALSE(contains < 0)) return false;
    if (contains != 0) return true;
    // self.files_seen.add(name)
    if (ABSL_PREDICT_FALSE(PySet_Add(files_seen_.get(), name.get()) < 0)) {
      return false;
    }
    // for dependency in file_descriptor.dependencies:
    //   self.add_file(dependency)
    static constexpr Identifier id_dependencies("dependencies");
    const PythonPtr dependencies(
        PyObject_GetAttr(file_descriptor, id_dependencies.get()));
    if (ABSL_PREDICT_FALSE(dependencies == nullptr)) return false;
    const PythonPtr iter(PyObject_GetIter(dependencies.get()));
    if (ABSL_PREDICT_FALSE(iter == nullptr)) return false;
    while (const PythonPtr dependency{PyIter_Next(iter.get())}) {
      if (ABSL_PREDICT_FALSE(!AddFile(dependency.get()))) return false;
    }
    if (ABSL_PREDICT_FALSE(PyErr_Occurred() != nullptr)) return false;
    // file_descriptor_proto = self.file_descriptors.add()
    static constexpr Identifier id_add("add");
    const PythonPtr file_descriptor_proto(
        PyObject_CallMethodObjArgs(file_descriptors_, id_add.get(), nullptr));
    if (ABSL_PREDICT_FALSE(file_descriptor_proto == nullptr)) return false;
    // file_descriptor.CopyToProto(file_descriptor_proto)
    static constexpr Identifier id_CopyToProto("CopyToProto");
    return PythonPtr(PyObject_CallMethodObjArgs(
               file_descriptor, id_CopyToProto.get(),
               file_descriptor_proto.get(), nullptr)) != nullptr;
  }

 private:
  PyObject* file_descriptors_;
  PythonPtr files_seen_;
};

// `extern "C"` sets the C calling convention for compatibility with the Python
// API. `static` avoids making symbols public, as `extern "C"` trumps anonymous
// namespace.
extern "C" {

static PyObject* SetRecordType(PyObject* self, PyObject* args,
                               PyObject* kwargs) {
  static constexpr const char* keywords[] = {"metadata", "message_type",
                                             nullptr};
  PyObject* metadata_arg;
  PyObject* message_type_arg;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "OO:set_record_type", const_cast<char**>(keywords),
          &metadata_arg, &message_type_arg))) {
    return nullptr;
  }
  // message_descriptor = message_type.DESCRIPTOR
  static constexpr Identifier id_DESCRIPTOR("DESCRIPTOR");
  const PythonPtr message_descriptor(
      PyObject_GetAttr(message_type_arg, id_DESCRIPTOR.get()));
  if (ABSL_PREDICT_FALSE(message_descriptor == nullptr)) return nullptr;
  // metadata.record_type_name = message_descriptor.full_name
  static constexpr Identifier id_full_name("full_name");
  const PythonPtr full_name(
      PyObject_GetAttr(message_descriptor.get(), id_full_name.get()));
  if (ABSL_PREDICT_FALSE(full_name == nullptr)) return nullptr;
  static constexpr Identifier id_record_type_name("record_type_name");
  if (ABSL_PREDICT_FALSE(PyObject_SetAttr(metadata_arg,
                                          id_record_type_name.get(),
                                          full_name.get()) < 0)) {
    return nullptr;
  }
  // file_descriptors = metadata.file_descriptor
  static constexpr Identifier id_file_descriptor("file_descriptor");
  const PythonPtr file_descriptors(
      PyObject_GetAttr(metadata_arg, id_file_descriptor.get()));
  if (ABSL_PREDICT_FALSE(file_descriptors == nullptr)) return nullptr;
  // del file_descriptors[:]
  const PythonPtr slice(PySlice_New(nullptr, nullptr, nullptr));
  if (ABSL_PREDICT_FALSE(slice == nullptr)) return nullptr;
  if (ABSL_PREDICT_FALSE(PyObject_DelItem(file_descriptors.get(), slice.get()) <
                         0)) {
    return nullptr;
  }
  // file_descriptor = message_descriptor.file
  static constexpr Identifier id_file("file");
  const PythonPtr file_descriptor(
      PyObject_GetAttr(message_descriptor.get(), id_file.get()));
  if (ABSL_PREDICT_FALSE(file_descriptor == nullptr)) return nullptr;
  // FileDescriptorCollector(file_descriptors).add_file(file_descriptor)
  FileDescriptorCollector collector;
  if (ABSL_PREDICT_FALSE(!collector.Init(file_descriptors.get()))) {
    return nullptr;
  }
  if (ABSL_PREDICT_FALSE(!collector.AddFile(file_descriptor.get()))) {
    return nullptr;
  }
  Py_RETURN_NONE;
}

}  // extern "C"

struct PyRecordWriterObject {
  // clang-format off
  PyObject_HEAD
  static_assert(true, "");  // clang-format workaround.
  // clang-format on

  PythonWrapped<RecordWriter<PythonWriter>> record_writer;
};

extern PyTypeObject PyRecordWriter_Type;

void SetExceptionFromRecordWriter(PyRecordWriterObject* self) {
  RIEGELI_ASSERT(!self->record_writer->ok())
      << "Failed precondition of SetExceptionFromRecordWriter(): "
         "RecordWriter OK";
  if (!self->record_writer->dest().exception().ok()) {
    self->record_writer->dest().exception().Restore();
    return;
  }
  SetRiegeliError(self->record_writer->status());
}

// `extern "C"` sets the C calling convention for compatibility with the Python
// API. `static` avoids making symbols public, as `extern "C"` trumps anonymous
// namespace.
extern "C" {

static void RecordWriterDestructor(PyRecordWriterObject* self) {
  PyObject_GC_UnTrack(self);
  Py_TRASHCAN_BEGIN(self, RecordWriterDestructor);
  PythonUnlocked([&] { self->record_writer.reset(); });
  Py_TYPE(self)->tp_free(self);
  Py_TRASHCAN_END;
}

static int RecordWriterTraverse(PyRecordWriterObject* self, visitproc visit,
                                void* arg) {
  if (self->record_writer.has_value()) {
    return self->record_writer->dest().Traverse(visit, arg);
  }
  return 0;
}

static int RecordWriterClear(PyRecordWriterObject* self) {
  PythonUnlocked([&] { self->record_writer.reset(); });
  return 0;
}

static int RecordWriterInit(PyRecordWriterObject* self, PyObject* args,
                            PyObject* kwargs) {
  static constexpr const char* keywords[] = {"dest",
                                             "owns_dest",
                                             "assumed_pos",
                                             "min_buffer_size",
                                             "max_buffer_size",
                                             "buffer_size",
                                             "options",
                                             "metadata",
                                             "serialized_metadata",
                                             nullptr};
  PyObject* dest_arg;
  PyObject* owns_dest_arg = nullptr;
  PyObject* assumed_pos_arg = nullptr;
  PyObject* min_buffer_size_arg = nullptr;
  PyObject* max_buffer_size_arg = nullptr;
  PyObject* buffer_size_arg = nullptr;
  PyObject* options_arg = nullptr;
  PyObject* metadata_arg = nullptr;
  PyObject* serialized_metadata_arg = nullptr;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "O|$OOOOOOOO:RecordWriter",
          const_cast<char**>(keywords), &dest_arg, &owns_dest_arg,
          &assumed_pos_arg, &min_buffer_size_arg, &max_buffer_size_arg,
          &buffer_size_arg, &options_arg, &metadata_arg,
          &serialized_metadata_arg))) {
    return -1;
  }

  PythonWriter::Options python_writer_options;
  python_writer_options.set_owns_dest(true);
  if (owns_dest_arg != nullptr) {
    const int owns_dest_is_true = PyObject_IsTrue(owns_dest_arg);
    if (ABSL_PREDICT_FALSE(owns_dest_is_true < 0)) return -1;
    python_writer_options.set_owns_dest(owns_dest_is_true != 0);
  }
  if (assumed_pos_arg != nullptr && assumed_pos_arg != Py_None) {
    const std::optional<Position> assumed_pos =
        PositionFromPython(assumed_pos_arg);
    if (ABSL_PREDICT_FALSE(assumed_pos == std::nullopt)) return -1;
    python_writer_options.set_assumed_pos(*assumed_pos);
  }
  if (buffer_size_arg != nullptr && buffer_size_arg != Py_None) {
    min_buffer_size_arg = buffer_size_arg;
    max_buffer_size_arg = buffer_size_arg;
  }
  if (min_buffer_size_arg != nullptr) {
    const std::optional<size_t> min_buffer_size =
        SizeFromPython(min_buffer_size_arg);
    if (ABSL_PREDICT_FALSE(min_buffer_size == std::nullopt)) return -1;
    python_writer_options.set_min_buffer_size(*min_buffer_size);
  }
  if (max_buffer_size_arg != nullptr) {
    const std::optional<size_t> max_buffer_size =
        SizeFromPython(max_buffer_size_arg);
    if (ABSL_PREDICT_FALSE(max_buffer_size == std::nullopt)) return -1;
    python_writer_options.set_max_buffer_size(*max_buffer_size);
  }

  RecordWriterBase::Options record_writer_options;
  if (options_arg != nullptr) {
    StrOrBytes options;
    if (ABSL_PREDICT_FALSE(!options.FromPython(options_arg))) return -1;
    if (const absl::Status status = record_writer_options.FromString(options);
        ABSL_PREDICT_FALSE(!status.ok())) {
      SetRiegeliError(status);
      return -1;
    }
  }
  if (metadata_arg != nullptr && metadata_arg != Py_None) {
    static constexpr Identifier id_SerializeToString("SerializeToString");
    const PythonPtr serialized_metadata_str(PyObject_CallMethodObjArgs(
        metadata_arg, id_SerializeToString.get(), nullptr));
    if (ABSL_PREDICT_FALSE(serialized_metadata_str == nullptr)) return -1;
    std::optional<Chain> serialized_metadata =
        ChainFromPython(serialized_metadata_str.get());
    if (ABSL_PREDICT_FALSE(serialized_metadata == std::nullopt)) return -1;
    record_writer_options.set_serialized_metadata(
        *std::move(serialized_metadata));
  }
  if (serialized_metadata_arg != nullptr &&
      serialized_metadata_arg != Py_None) {
    std::optional<Chain> serialized_metadata =
        ChainFromPython(serialized_metadata_arg);
    if (ABSL_PREDICT_FALSE(serialized_metadata == std::nullopt)) return -1;
    if (record_writer_options.serialized_metadata() != std::nullopt) {
      PyErr_SetString(PyExc_TypeError,
                      "RecordWriter() got conflicting keyword arguments "
                      "'metadata' and 'serialized_metadata'");
      return -1;
    }
    record_writer_options.set_serialized_metadata(
        *std::move(serialized_metadata));
  }

  PythonWriter python_writer(dest_arg, std::move(python_writer_options));
  PythonUnlocked([&] {
    self->record_writer.emplace(std::move(python_writer),
                                std::move(record_writer_options));
  });
  if (ABSL_PREDICT_FALSE(!self->record_writer->ok())) {
    self->record_writer->dest().Close();
    SetExceptionFromRecordWriter(self);
    return -1;
  }
  return 0;
}

static PyObject* RecordWriterDest(PyRecordWriterObject* self, void* closure) {
  PyObject* const dest = ABSL_PREDICT_FALSE(!self->record_writer.has_value())
                             ? Py_None
                             : self->record_writer->dest().dest();
  Py_INCREF(dest);
  return dest;
}

static PyObject* RecordWriterRepr(PyRecordWriterObject* self) {
  const PythonPtr format = StringToPython("<RecordWriter dest={!r}>");
  if (ABSL_PREDICT_FALSE(format == nullptr)) return nullptr;
  // return format.format(self.dest)
  PyObject* const dest = ABSL_PREDICT_FALSE(!self->record_writer.has_value())
                             ? Py_None
                             : self->record_writer->dest().dest();
  static constexpr Identifier id_format("format");
  return PyObject_CallMethodObjArgs(format.get(), id_format.get(), dest,
                                    nullptr);
}

static PyObject* RecordWriterEnter(PyObject* self, PyObject* args) {
  // return self
  Py_INCREF(self);
  return self;
}

static PyObject* RecordWriterExit(PyRecordWriterObject* self, PyObject* args) {
  PyObject* exc_type;
  PyObject* exc_value;
  PyObject* traceback;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type,
                                           &exc_value, &traceback))) {
    return nullptr;
  }
  // self.close(), suppressing exceptions if exc_type != None.
  if (ABSL_PREDICT_TRUE(self->record_writer.has_value())) {
    const bool close_ok =
        PythonUnlocked([&] { return self->record_writer->Close(); });
    if (ABSL_PREDICT_FALSE(!close_ok) && exc_type == Py_None) {
      SetExceptionFromRecordWriter(self);
      return nullptr;
    }
  }
  Py_RETURN_FALSE;
}

static PyObject* RecordWriterClose(PyRecordWriterObject* self, PyObject* args) {
  if (ABSL_PREDICT_TRUE(self->record_writer.has_value())) {
    const bool close_ok =
        PythonUnlocked([&] { return self->record_writer->Close(); });
    if (ABSL_PREDICT_FALSE(!close_ok)) {
      SetExceptionFromRecordWriter(self);
      return nullptr;
    }
  }
  Py_RETURN_NONE;
}

static PyObject* RecordWriterWriteRecord(PyRecordWriterObject* self,
                                         PyObject* args, PyObject* kwargs) {
  static constexpr const char* keywords[] = {"record", nullptr};
  PyObject* record_arg;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "O:write_record", const_cast<char**>(keywords),
          &record_arg))) {
    return nullptr;
  }
  BytesLike record;
  if (ABSL_PREDICT_FALSE(!record.FromPython(record_arg))) return nullptr;
  if (ABSL_PREDICT_FALSE(!self->record_writer.Verify())) return nullptr;
  const bool write_record_ok =
      PythonUnlocked([&] { return self->record_writer->WriteRecord(record); });
  if (ABSL_PREDICT_FALSE(!write_record_ok)) {
    SetExceptionFromRecordWriter(self);
    return nullptr;
  }
  Py_RETURN_NONE;
}

static PyObject* RecordWriterWriteMessage(PyRecordWriterObject* self,
                                          PyObject* args, PyObject* kwargs) {
  static constexpr const char* keywords[] = {"record", nullptr};
  PyObject* record_arg;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "O:write_message", const_cast<char**>(keywords),
          &record_arg))) {
    return nullptr;
  }
  // self.write_record(record.SerializeToString())
  static constexpr Identifier id_SerializeToString("SerializeToString");
  const PythonPtr serialized_object(PyObject_CallMethodObjArgs(
      record_arg, id_SerializeToString.get(), nullptr));
  if (ABSL_PREDICT_FALSE(serialized_object == nullptr)) return nullptr;
  BytesLike serialized;
  if (ABSL_PREDICT_FALSE(!serialized.FromPython(serialized_object.get()))) {
    return nullptr;
  }
  if (ABSL_PREDICT_FALSE(!self->record_writer.Verify())) return nullptr;
  const bool write_record_ok = PythonUnlocked(
      [&] { return self->record_writer->WriteRecord(serialized); });
  if (ABSL_PREDICT_FALSE(!write_record_ok)) {
    SetExceptionFromRecordWriter(self);
    return nullptr;
  }
  Py_RETURN_NONE;
}

static PyObject* RecordWriterWriteRecords(PyRecordWriterObject* self,
                                          PyObject* args, PyObject* kwargs) {
  static constexpr const char* keywords[] = {"records", nullptr};
  PyObject* records_arg;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "O:write_records", const_cast<char**>(keywords),
          &records_arg))) {
    return nullptr;
  }
  // for record in records:
  //   self.write_record(record)
  const PythonPtr iter(PyObject_GetIter(records_arg));
  if (ABSL_PREDICT_FALSE(iter == nullptr)) return nullptr;
  while (const PythonPtr record_object{PyIter_Next(iter.get())}) {
    BytesLike record;
    if (ABSL_PREDICT_FALSE(!record.FromPython(record_object.get()))) {
      return nullptr;
    }
    if (ABSL_PREDICT_FALSE(!self->record_writer.Verify())) return nullptr;
    const bool write_record_ok = PythonUnlocked(
        [&] { return self->record_writer->WriteRecord(record); });
    if (ABSL_PREDICT_FALSE(!write_record_ok)) {
      SetExceptionFromRecordWriter(self);
      return nullptr;
    }
  }
  if (ABSL_PREDICT_FALSE(PyErr_Occurred() != nullptr)) return nullptr;
  Py_RETURN_NONE;
}

static PyObject* RecordWriterWriteMessages(PyRecordWriterObject* self,
                                           PyObject* args, PyObject* kwargs) {
  static constexpr const char* keywords[] = {"records", nullptr};
  PyObject* records_arg;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "O:write_messages", const_cast<char**>(keywords),
          &records_arg))) {
    return nullptr;
  }
  // for record in records:
  //   self.write_record(record.SerializeToString())
  const PythonPtr iter(PyObject_GetIter(records_arg));
  if (ABSL_PREDICT_FALSE(iter == nullptr)) return nullptr;
  while (const PythonPtr record_object{PyIter_Next(iter.get())}) {
    static constexpr Identifier id_SerializeToString("SerializeToString");
    const PythonPtr serialized_object(PyObject_CallMethodObjArgs(
        record_object.get(), id_SerializeToString.get(), nullptr));
    if (ABSL_PREDICT_FALSE(serialized_object == nullptr)) return nullptr;
    BytesLike serialized;
    if (ABSL_PREDICT_FALSE(!serialized.FromPython(serialized_object.get()))) {
      return nullptr;
    }
    if (ABSL_PREDICT_FALSE(!self->record_writer.Verify())) return nullptr;
    const bool write_record_ok = PythonUnlocked(
        [&] { return self->record_writer->WriteRecord(serialized); });
    if (ABSL_PREDICT_FALSE(!write_record_ok)) {
      SetExceptionFromRecordWriter(self);
      return nullptr;
    }
  }
  if (ABSL_PREDICT_FALSE(PyErr_Occurred() != nullptr)) return nullptr;
  Py_RETURN_NONE;
}

static PyObject* RecordWriterFlush(PyRecordWriterObject* self, PyObject* args,
                                   PyObject* kwargs) {
  static constexpr const char* keywords[] = {"flush_type", nullptr};
  PyObject* flush_type_arg = nullptr;
  if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords(
          args, kwargs, "|O:flush", const_cast<char**>(keywords),
          &flush_type_arg))) {
    return nullptr;
  }
  FlushType flush_type = FlushType::kFromProcess;
  if (flush_type_arg != nullptr) {
    if (ABSL_PREDICT_FALSE(!FlushTypeFromPython(flush_type_arg, &flush_type))) {
      return nullptr;
    }
  }
  if (ABSL_PREDICT_FALSE(!self->record_writer.Verify())) return nullptr;
  const bool flush_ok =
      PythonUnlocked([&] { return self->record_writer->Flush(flush_type); });
  if (ABSL_PREDICT_FALSE(!flush_ok)) {
    SetExceptionFromRecordWriter(self);
    return nullptr;
  }
  Py_RETURN_NONE;
}

static PyObject* RecordWriterLastPos(PyRecordWriterObject* self,
                                     void* closure) {
  if (ABSL_PREDICT_FALSE(!self->record_writer.Verify())) return nullptr;
  if (ABSL_PREDICT_FALSE(!kRecordPositionApi.Verify())) return nullptr;
  if (ABSL_PREDICT_FALSE(!self->record_writer->last_record_is_valid())) {
    SetRiegeliError(absl::FailedPreconditionError("No record was written"));
    return nullptr;
  }
  return kRecordPositionApi
      ->RecordPositionToPython(self->record_writer->LastPos())
      .release();
}

static PyObject* RecordWriterPos(PyRecordWriterObject* self, void* closure) {
  if (ABSL_PREDICT_FALSE(!self->record_writer.Verify())) return nullptr;
  if (ABSL_PREDICT_FALSE(!kRecordPositionApi.Verify())) return nullptr;
  return kRecordPositionApi->RecordPositionToPython(self->record_writer->Pos())
      .release();
}

static PyObject* RecordWriterEstimatedSize(PyRecordWriterObject* self,
                                           PyObject* args) {
  if (ABSL_PREDICT_FALSE(!self->record_writer.Verify())) return nullptr;
  return PositionToPython(self->record_writer->EstimatedSize()).release();
}

}  // extern "C"

const PyMethodDef RecordWriterMethods[] = {
    {"__enter__", RecordWriterEnter, METH_NOARGS,
     R"doc(
__enter__(self) -> RecordWriter

Returns self.
)doc"},
    {"__exit__", reinterpret_cast<PyCFunction>(RecordWriterExit), METH_VARARGS,
     R"doc(
__exit__(self, exc_type, exc_value, traceback) -> bool

Calls close().

Suppresses exceptions from close() if an exception is already in flight.

Args:
  exc_type: None or exception in flight (type).
  exc_value: None or exception in flight (value).
  traceback: None or exception in flight (traceback).
)doc"},
    {"close", reinterpret_cast<PyCFunction>(RecordWriterClose), METH_NOARGS,
     R"doc(
close(self) -> None

Indicates that writing is done.

Writes buffered data to the file. Marks the RecordWriter as closed,
disallowing further writing.

If the RecordWriter was failed, raises the same exception again.

If the RecordWriter was not failed but already closed, does nothing.
)doc"},
    {"write_record", reinterpret_cast<PyCFunction>(RecordWriterWriteRecord),
     METH_VARARGS | METH_KEYWORDS, R"doc(
write_record(self, record: bytes | bytearray | memoryview) -> None

Writes the next record.

Args:
  record: Record to write as a bytes-like object.
)doc"},
    {"write_message", reinterpret_cast<PyCFunction>(RecordWriterWriteMessage),
     METH_VARARGS | METH_KEYWORDS, R"doc(
write_message(self, record: Message) -> None

Writes the next record.

Args:
  record: Record to write as a proto message.
)doc"},
    {"write_records", reinterpret_cast<PyCFunction>(RecordWriterWriteRecords),
     METH_VARARGS | METH_KEYWORDS, R"doc(
write_records(
    self, records: Iterable[bytes | bytearray | memoryview]) -> None

Writes a number of records.

Args:
  records: Records to write as an iterable of bytes-like objects.
)doc"},
    {"write_messages", reinterpret_cast<PyCFunction>(RecordWriterWriteMessages),
     METH_VARARGS | METH_KEYWORDS, R"doc(
write_messages(self, records: Iterable[Message]) -> None

Writes a number of records.

Args:
  records: Records to write as an iterable of proto messages.
)doc"},
    {"flush", reinterpret_cast<PyCFunction>(RecordWriterFlush),
     METH_VARARGS | METH_KEYWORDS, R"doc(
flush(self, flush_type: FlushType = FlushType.FROM_PROCESS) -> None

Finalizes any open chunk and pushes buffered data to the destination.
If parallelism was used in options, waits for any background writing to
complete.

This makes data written so far visible, but in contrast to close(),
keeps the possibility to write more data later. What exactly does it mean
for data to be visible depends on the destination.

This degrades compression density if used too often.

Args:
  flush_type: The scope of objects to flush and the intended data durability
  (without a guarantee).
   * FlushType.FROM_OBJECT:  Makes data written so far visible in other
                             objects, propagating flushing through owned
                             dependencies of the given writer.
   * FlushType.FROM_PROCESS: Makes data written so far visible outside
                             the process, propagating flushing through
                             dependencies of the given writer.
                             This is the default.
   * FlushType.FROM_MACHINE: Makes data written so far visible outside
                             the process and durable in case of operating
                             system crash, propagating flushing through
                             dependencies of the given writer.
)doc"},
    {"estimated_size", reinterpret_cast<PyCFunction>(RecordWriterEstimatedSize),
     METH_NOARGS,
     R"doc(
estimated_size(self) -> int

Returns an estimation of the file size if no more data is written, without
affecting data representation (i.e. without closing the current chunk) and
without blocking.

This is an underestimation because pending work is not taken into account:
 * The currently open chunk.
 * If parallelism was used in options, chunks being encoded in background.

The exact file size can be found by flush(FlushType.FROM_OBJECT) which closes
the currently open chunk, and pos.chunk_begin (record_index == 0 after flushing)
which might need to wait for some background work to complete.
)doc"},
    {nullptr, nullptr, 0, nullptr},
};

const PyGetSetDef RecordWriterGetSet[] = {
    {const_cast<char*>("dest"), reinterpret_cast<getter>(RecordWriterDest),
     nullptr, const_cast<char*>(R"doc(
dest: BinaryIO

Binary IO stream being written to.
)doc"),
     nullptr},
    {const_cast<char*>("last_pos"),
     reinterpret_cast<getter>(RecordWriterLastPos), nullptr,
     const_cast<char*>(R"doc(
last_pos: RecordPosition

The canonical position of the last record written.

The canonical position is the largest among all equivalent positions.
Seeking to any equivalent position leads to reading the same record.

last_pos.numeric returns the position as an int.

Precondition:
  a record was successfully written
)doc"),
     nullptr},
    {const_cast<char*>("pos"), reinterpret_cast<getter>(RecordWriterPos),
     nullptr, const_cast<char*>(R"doc(
pos: RecordPosition

A position of the next record (or the end of file if there is no next record).

A position which is not canonical can be smaller than the equivalent canonical
position. Seeking to any equivalent position leads to reading the same record.

pos.numeric returns the position as an int.

After opening the file, close(), or flush(), pos is the canonical position of
the next record, and pos.record_index == 0.
)doc"),
     nullptr},
    {nullptr, nullptr, nullptr, nullptr, nullptr}};

PyTypeObject PyRecordWriter_Type = {
    // clang-format off
    PyVarObject_HEAD_INIT(&PyType_Type, 0)
    // clang-format on
    "riegeli.records.record_writer.RecordWriter",          // tp_name
    sizeof(PyRecordWriterObject),                          // tp_basicsize
    0,                                                     // tp_itemsize
    reinterpret_cast<destructor>(RecordWriterDestructor),  // tp_dealloc
#if PY_VERSION_HEX >= 0x03080000
    0,  // tp_vectorcall_offset
#else
    nullptr,  // tp_print
#endif
    nullptr,                                       // tp_getattr
    nullptr,                                       // tp_setattr
    nullptr,                                       // tp_as_async
    reinterpret_cast<reprfunc>(RecordWriterRepr),  // tp_repr
    nullptr,                                       // tp_as_number
    nullptr,                                       // tp_as_sequence
    nullptr,                                       // tp_as_mapping
    nullptr,                                       // tp_hash
    nullptr,                                       // tp_call
    nullptr,                                       // tp_str
    nullptr,                                       // tp_getattro
    nullptr,                                       // tp_setattro
    nullptr,                                       // tp_as_buffer
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,  // tp_flags
    R"doc(
RecordWriter(
    dest: BinaryIO,
    *,
    owns_dest: bool = True,
    assumed_pos: int | None = None,
    min_buffer_size: int = 4 << 10,
    max_buffer_size: int = 64 << 10,
    buffer_size: int | None,
    options: str | bytes = '',
    metadata: RecordsMetadata | None = None,
    serialized_metadata: bytes | bytearray | memoryview = b''
) -> RecordWriter

Will write to the given file.

Args:
  dest: Binary IO stream to write to.
  owns_dest: If True, dest is owned, close() or __exit__() calls dest.close(),
    and flush(flush_type) calls dest.flush() even if flush_type is
    FlushType.FROM_OBJECT.
  assumed_pos: If None, dest must support random access. If an int, it is enough
    that dest supports sequential access, and this position will be assumed
    initially.
  min_buffer_size: Tunes the minimal buffer size, which determines how much data
    at a time is typically written to dest. The actual buffer size changes
    between min_buffer_size and max_buffer_size depending on the access pattern.
  max_buffer_size: Tunes the maximal buffer size, which determines how much data
    at a time is typically written to dest. The actual buffer size changes
    between min_buffer_size and max_buffer_size depending on the access pattern.
  buffer_size: If not None, a shortcut for setting min_buffer_size and
    max_buffer_size to the same value.
  options: Compression and other writing options. See below.
  metadata: If not None, file metadata to be written at the beginning (if
    metadata has any fields set). Metadata are written only when the file is
    written from the beginning, not when it is appended to. Record type in
    metadata can be conveniently set by set_record_type().
  serialized_metadata: If not empty, like metadata, but metadata are passed
    serialized as a bytes-like object. This is faster if the caller has metadata
    already serialized. This conflicts with metadata.

The dest argument should be a binary IO stream which supports:
 * close()          - for close() or __exit__() if owns_dest
 * write(bytes)
 * flush()          - for flush()
 * seek(int[, int]) - if assumed_pos is None
 * tell()           - if assumed_pos is None

Example values for dest (possibly with 'ab' instead of 'wb' for appending):
 * io.FileIO(filename, 'wb')
 * io.open(filename, 'wb') - better with buffering=0, or use io.FileIO() instead
 * open(filename, 'wb')    - better with buffering=0, or use io.FileIO() instead
 * io.BytesIO()            - use owns_dest=False to access dest after closing
                             the RecordWriter
 * tf.io.gfile.GFile(filename, 'wb')

Options are documented at
https://github.com/google/riegeli/blob/master/doc/record_writer_options.md
)doc",                                                              // tp_doc
    reinterpret_cast<traverseproc>(RecordWriterTraverse),  // tp_traverse
    reinterpret_cast<inquiry>(RecordWriterClear),          // tp_clear
    nullptr,                                               // tp_richcompare
    0,                                                     // tp_weaklistoffset
    nullptr,                                               // tp_iter
    nullptr,                                               // tp_iternext
    const_cast<PyMethodDef*>(RecordWriterMethods),         // tp_methods
    nullptr,                                               // tp_members
    const_cast<PyGetSetDef*>(RecordWriterGetSet),          // tp_getset
    nullptr,                                               // tp_base
    nullptr,                                               // tp_dict
    nullptr,                                               // tp_descr_get
    nullptr,                                               // tp_descr_set
    0,                                                     // tp_dictoffset
    reinterpret_cast<initproc>(RecordWriterInit),          // tp_init
    nullptr,                                               // tp_alloc
    PyType_GenericNew,                                     // tp_new
    nullptr,                                               // tp_free
    nullptr,                                               // tp_is_gc
    nullptr,                                               // tp_bases
    nullptr,                                               // tp_mro
    nullptr,                                               // tp_cache
    nullptr,                                               // tp_subclasses
    nullptr,                                               // tp_weaklist
    nullptr,                                               // tp_del
    0,                                                     // tp_version_tag
    nullptr,                                               // tp_finalize
};

const char* const kModuleName = "riegeli.records.record_writer";
const char kModuleDoc[] = R"doc(Writes records to a Riegeli/records file.)doc";

const PyMethodDef kModuleMethods[] = {
    {"set_record_type", reinterpret_cast<PyCFunction>(SetRecordType),
     METH_VARARGS | METH_KEYWORDS,
     R"doc(
set_record_type(metadata: RecordsMetadata, message_type: type[Message]) -> None

Sets record_type_name and file_descriptor in metadata.

Args:
  metadata: Riegeli/records file metadata being filled, typically will become
    the metadata argument of RecordWriter().
  message_type: Promised type of records, typically the argument type of
    RecordWriter.write_message().
)doc"},
    {nullptr, nullptr, 0, nullptr},
};

PyModuleDef kModuleDef = {
    PyModuleDef_HEAD_INIT,
    kModuleName,                               // m_name
    kModuleDoc,                                // m_doc
    -1,                                        // m_size
    const_cast<PyMethodDef*>(kModuleMethods),  // m_methods
    nullptr,                                   // m_slots
    nullptr,                                   // m_traverse
    nullptr,                                   // m_clear
    nullptr,                                   // m_free
};

PyObject* InitModule() {
  if (ABSL_PREDICT_FALSE(PyType_Ready(&PyRecordWriter_Type) < 0)) {
    return nullptr;
  }
  PythonPtr module(PyModule_Create(&kModuleDef));
  if (ABSL_PREDICT_FALSE(module == nullptr)) return nullptr;
  PyFlushType_Type = DefineFlushType().release();
  if (ABSL_PREDICT_FALSE(PyFlushType_Type == nullptr)) return nullptr;
  if (ABSL_PREDICT_FALSE(PyModule_AddObject(module.get(), "FlushType",
                                            PyFlushType_Type) < 0)) {
    return nullptr;
  }
  Py_INCREF(&PyRecordWriter_Type);
  if (ABSL_PREDICT_FALSE(PyModule_AddObject(module.get(), "RecordWriter",
                                            reinterpret_cast<PyObject*>(
                                                &PyRecordWriter_Type)) < 0)) {
    return nullptr;
  }
  return module.release();
}

}  // namespace

PyMODINIT_FUNC PyInit_record_writer() { return InitModule(); }

}  // namespace riegeli::python


================================================
FILE: python/riegeli/records/records_metadata.proto
================================================
edition = "2024";

package riegeli;

import "google/protobuf/descriptor.proto";

// Information about a Riegeli/records file, which may be helpful to interpret
// file contents.
message RecordsMetadata {
  // Human-readable explanation of what the file contains.
  string file_comment = 1;

  // If records are proto messages of a fixed type, the full name of their type.
  string record_type_name = 2;

  // If `record_type_name` is set, proto file descriptors which should contain
  // the definition of that type and their dependencies (each file comes after
  // all its dependencies).
  //
  // If `file_descriptor` is empty but `record_type_name` is set (not
  // recommended), `record_type_name` can be interpreted in the context of an
  // unspecified proto descriptor database.
  repeated google.protobuf.FileDescriptorProto file_descriptor = 3;

  // Options originally used to encode the file:
  // https://github.com/google/riegeli/blob/master/doc/record_writer_options.md
  //
  // They are informative here, they are never necessary to decode the file.
  string record_writer_options = 4;

  // Number of records in the file, so that the reader can tune for it.
  //
  // This is informative, the actual number of records may differ.
  int64 num_records = 5;

  // Clients can define custom metadata in extensions of this message.
  extensions 1000 to max;
}


================================================
FILE: python/riegeli/records/skipped_region.py
================================================
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Details about a skipped region of invalid file contents."""

__all__ = ('SkippedRegion',)


class SkippedRegion:
  """Details about a skipped region of invalid file contents.

  Attributes:
      begin: File position of the beginning of the skipped region, inclusive.
      end: File position of the end of the skipped region, exclusive.
      length: Length of the skipped region, in bytes.
      message: Message explaining why the region is invalid.
  """

  __slots__ = ('begin', 'end', 'message')

  def __init__(self, begin, end, message):
    if begin > end:
      raise ValueError(f'Positions in the wrong order: {begin} > {end}')
    self.begin = begin
    self.end = end
    self.message = message

  @property
  def length(self):
    return self.end - self.begin

  def __str__(self):
    return f'[{self.begin}..{self.end}): {self.message}'

  def __repr__(self):
    return f'SkippedRegion({self.begin}, {self.end}, {self.message!r})'


================================================
FILE: python/riegeli/records/tests/BUILD
================================================
load("@com_google_protobuf//bazel:proto_library.bzl", "proto_library")
load("@com_google_protobuf//bazel:py_proto_library.bzl", "py_proto_library")
load("@rules_python//python:defs.bzl", "py_test")

package(
    default_visibility = [
        "//python/riegeli:__subpackages__",
    ],
    features = ["header_modules"],
)

licenses(["notice"])

py_test(
    name = "records_test",
    srcs = ["records_test.py"],
    deps = [
        ":records_test_py_pb2",
        "//python/riegeli",
        "@absl_py//absl/logging",
        "@absl_py//absl/testing:absltest",
        "@absl_py//absl/testing:parameterized",
        "@com_google_protobuf//:protobuf_python",
    ],
)

proto_library(
    name = "records_test_proto",
    srcs = ["records_test.proto"],
)

py_proto_library(
    name = "records_test_py_pb2",
    deps = ["records_test_proto"],
)


================================================
FILE: python/riegeli/records/tests/__init__.py
================================================


================================================
FILE: python/riegeli/records/tests/records_test.proto
================================================
edition = "2024";

package riegeli.tests;

message SimpleMessage {
  int32 id = 1;
  bytes payload = 2;
}


================================================
FILE: python/riegeli/records/tests/records_test.py
================================================
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import abc
import contextlib
from enum import Enum
import io
import itertools

from absl import logging
from absl.testing import absltest
from absl.testing import parameterized
from google.protobuf import message
import riegeli
from riegeli.records.tests import records_test_pb2


def combine_named_parameters(*testcase_sets):
  """Allows a parameterized test with multiple independent parameters.

  Example:
    combine_named_parameters([('BytesIO', BytesIOSpec), ('FileIO', FileIOSpec)],
                             [('serial', 0), ('parallel', 10)])
      yields the same elements as
    [('BytesIO_serial', BytesIOSpec, 0), ('BytesIO_parallel', BytesIOSpec, 10),
     ('FileIO_serial', FileIOSpec, 0), ('FileIO_parallel', FileIOSpec, 10)]
  """
  for combination in itertools.product(*testcase_sets):
    key = '_'.join(name for name, _ in combination)
    values = [value for _, value in combination]
    yield tuple([key] + values)


class RandomAccess(Enum):
  RANDOM_ACCESS = 1
  SEQUENTIAL_ACCESS_DETECTED = 2
  SEQUENTIAL_ACCESS_EXPLICIT = 3


class FakeFile:
  __slots__ = ('_random_access',)

  def __init__(self, random_access):
    self._random_access = random_access

  def seekable(self):
    return self._random_access

  def tell(self):
    if self._random_access:
      return 0
    raise NotImplementedError('tell()')

  def __getattr__(self, name):
    raise NotImplementedError(f'{name}()')


class UnseekableWrapper:
  __slots__ = ('_wrapped',)

  def __init__(self, wrapped):
    self._wrapped = wrapped

  def seekable(self):
    return False

  def tell(self, *args):
    raise NotImplementedError('tell()')

  def seek(self, *args):
    raise NotImplementedError('seek()')

  def __getattr__(self, name):
    return getattr(self._wrapped, name)


class FileSpecBase(metaclass=abc.ABCMeta):
  __slots__ = ('_random_access', '_file')

  def __init__(self, create_tempfile, random_access):
    self._random_access = random_access
    self._file = None

  @abc.abstractmethod
  def _open_for_writing(self):
    raise NotImplementedError('_open_for_writing()')

  def writing_open(self):
    self._open_for_writing()
    logging.debug('Opened %r for writing', self._file)
    if self._random_access is RandomAccess.RANDOM_ACCESS:
      return self._file
    else:
      return UnseekableWrapper(self._file)

  @property
  def writing_should_close(self):
    return True

  @property
  def writing_assumed_pos(self):
    if self._random_access is RandomAccess.SEQUENTIAL_ACCESS_EXPLICIT:
      return 0
    return None

  @abc.abstractmethod
  def _open_for_reading(self):
    raise NotImplementedError('_open_for_reading()')

  def reading_open(self):
    self._open_for_reading()
    logging.debug('Opened %r for reading', self._file)
    if self._random_access is RandomAccess.RANDOM_ACCESS:
      return self._file
    else:
      return UnseekableWrapper(self._file)

  @property
  def reading_should_close(self):
    return True

  @property
  def reading_assumed_pos(self):
    if self._random_access is RandomAccess.SEQUENTIAL_ACCESS_EXPLICIT:
      return 0
    return None

  def close(self):
    pass


class BytesIOSpec(FileSpecBase):
  __slots__ = ()

  def _open_for_writing(self):
    self._file = io.BytesIO()

  @property
  def writing_should_close(self):
    # If BytesIO is closed, it loses bytes written.
    return False

  def _open_for_reading(self):
    if self._file is None:
      raise ValueError('file was not set')
    self._file.seek(0)


class LocalFileSpecBase(FileSpecBase):
  __slots__ = ('_filename',)

  def __init__(self, create_tempfile, random_access):
    super().__init__(create_tempfile, random_access)
    self._filename = create_tempfile().full_path


class FileIOSpec(LocalFileSpecBase):
  __slots__ = ()

  def _open_for_writing(self):
    self._file = io.FileIO(self._filename, mode='wb')

  def _open_for_reading(self):
    self._file = io.FileIO(self._filename, mode='rb')


class BufferedIOSpec(LocalFileSpecBase):
  __slots__ = ()

  def _open_for_writing(self):
    self._file = io.open(self._filename, mode='wb')

  def _open_for_reading(self):
    self._file = io.open(self._filename, mode='rb')


class BuiltinFileSpec(LocalFileSpecBase):
  __slots__ = ()

  def _open_for_writing(self):
    self._file = open(self._filename, mode='wb')

  def _open_for_reading(self):
    self._file = open(self._filename, mode='rb')


def sample_string(i, size):
  piece = f'{i} '.encode()
  result = piece * -(-size // len(piece))  # len(result) >= size
  return result[:size]


def sample_message(i, size):
  return records_test_pb2.SimpleMessage(id=i, payload=sample_string(i, size))


def sample_message_id_only(i):
  return records_test_pb2.SimpleMessage(id=i)


def sample_invalid_message(size):
  return b'\xff' * size  # An unfinished varint.


def record_writer_options(parallelism, transpose=False, chunk_size=35000):
  return (
      f'{"transpose," if transpose else ""}uncompressed,'
      f'chunk_size:{chunk_size},parallelism:{parallelism}'
  )


_FILE_SPEC_VALUES = (
    ('BytesIO', BytesIOSpec),
    ('FileIO', FileIOSpec),
    ('BufferedIO', BufferedIOSpec),
    ('BuiltinFile', BuiltinFileSpec),
)

_RANDOM_ACCESS_VALUES = (
    ('randomAccess', RandomAccess.RANDOM_ACCESS),
    ('sequentialAccessDetected', RandomAccess.SEQUENTIAL_ACCESS_DETECTED),
    ('sequentialAccessExplicit', RandomAccess.SEQUENTIAL_ACCESS_EXPLICIT),
)

_PARALLELISM_VALUES = (('serial', 0), ('parallel', 10))

_PARAMETERIZE_BY_FILE_SPEC = parameterized.named_parameters(*_FILE_SPEC_VALUES)

_PARAMETERIZE_BY_RANDOM_ACCESS = parameterized.named_parameters(
    *_RANDOM_ACCESS_VALUES
)

_PARAMETERIZE_BY_RANDOM_ACCESS_AND_PARALLELISM = parameterized.named_parameters(
    combine_named_parameters(_RANDOM_ACCESS_VALUES, _PARALLELISM_VALUES)
)

_PARAMETERIZE_BY_FILE_SPEC_AND_PARALLELISM = parameterized.named_parameters(
    combine_named_parameters(_FILE_SPEC_VALUES, _PARALLELISM_VALUES)
)

_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM = (
    parameterized.named_parameters(
        combine_named_parameters(
            _FILE_SPEC_VALUES, _RANDOM_ACCESS_VALUES, _PARALLELISM_VALUES
        )
    )
)


class RecordsTest(parameterized.TestCase):

  def corrupt_at(self, files, index):
    byte_reader = files.reading_open()
    contents1 = byte_reader.read(index)
    contents2 = byte_reader.read(1)
    contents2 = bytes([(contents2[0] + 1) % 256])
    contents3 = byte_reader.read()
    if files.reading_should_close:
      byte_reader.close()
    byte_writer = files.writing_open()
    byte_writer.write(contents1)
    byte_writer.write(contents2)
    byte_writer.write(contents3)
    if files.writing_should_close:
      byte_writer.close()

  @_PARAMETERIZE_BY_RANDOM_ACCESS_AND_PARALLELISM
  def test_record_writer_exception_from_file(self, random_access, parallelism):
    byte_writer = FakeFile(random_access is RandomAccess.RANDOM_ACCESS)
    with self.assertRaises(NotImplementedError):
      with riegeli.RecordWriter(
          byte_writer,
          assumed_pos=(
              0
              if random_access is RandomAccess.SEQUENTIAL_ACCESS_EXPLICIT
              else None
          ),
          options=record_writer_options(parallelism),
      ) as writer:
        writer.write_record(sample_string(0, 10000))

  @_PARAMETERIZE_BY_RANDOM_ACCESS
  def test_record_reader_exception_from_file(self, random_access):
    byte_reader = FakeFile(random_access is RandomAccess.RANDOM_ACCESS)
    with self.assertRaises(NotImplementedError):
      with riegeli.RecordReader(
          byte_reader,
          owns_src=False,
          assumed_pos=(
              0
              if random_access is RandomAccess.SEQUENTIAL_ACCESS_EXPLICIT
              else None
          ),
      ) as reader:
        reader.read_record()

  @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM
  def test_write_read_record(self, file_spec, random_access, parallelism):
    with contextlib.closing(
        file_spec(self.create_tempfile, random_access)
    ) as files:
      positions = []
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism),
      ) as writer:
        for i in range(23):
          pos = writer.pos
          writer.write_record(sample_string(i, 10000))
          canonical_pos = writer.last_pos
          if positions:
            self.assertGreater(pos, positions[-1])
          self.assertLessEqual(pos, canonical_pos)
          positions.append(canonical_pos)
        writer.close()
        end_pos = writer.pos
        self.assertEqual(writer.last_pos, positions[-1])
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
      ) as reader:
        for i in range(23):
          pos = reader.pos
          self.assertEqual(reader.read_record(), sample_string(i, 10000))
          canonical_pos = reader.last_pos
          self.assertEqual(canonical_pos, positions[i])
          self.assertLessEqual(pos, canonical_pos)
        self.assertIsNone(reader.read_record())
        self.assertEqual(reader.pos, end_pos)
        reader.close()
        self.assertEqual(reader.pos, end_pos)

  @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM
  def test_write_read_message(self, file_spec, random_access, parallelism):
    with contextlib.closing(
        file_spec(self.create_tempfile, random_access)
    ) as files:
      positions = []
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism),
      ) as writer:
        for i in range(23):
          pos = writer.pos
          writer.write_message(sample_message(i, 10000))
          canonical_pos = writer.last_pos
          if positions:
            self.assertGreater(pos, positions[-1])
          self.assertLessEqual(pos, canonical_pos)
          positions.append(canonical_pos)
        writer.close()
        end_pos = writer.pos
        self.assertEqual(writer.last_pos, positions[-1])
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
      ) as reader:
        for i in range(23):
          pos = reader.pos
          self.assertEqual(
              reader.read_message(records_test_pb2.SimpleMessage),
              sample_message(i, 10000),
          )
          canonical_pos = reader.last_pos
          self.assertEqual(canonical_pos, positions[i])
          self.assertLessEqual(pos, canonical_pos)
        self.assertIsNone(reader.read_message(records_test_pb2.SimpleMessage))
        self.assertEqual(reader.pos, end_pos)
        reader.close()
        self.assertEqual(reader.pos, end_pos)

  @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM
  def test_write_read_records(self, file_spec, random_access, parallelism):
    with contextlib.closing(
        file_spec(self.create_tempfile, random_access)
    ) as files:
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism),
      ) as writer:
        writer.write_records(sample_string(i, 10000) for i in range(23))
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
      ) as reader:
        self.assertEqual(
            list(reader.read_records()),
            [sample_string(i, 10000) for i in range(23)],
        )

  @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM
  def test_write_read_messages(self, file_spec, random_access, parallelism):
    with contextlib.closing(
        file_spec(self.create_tempfile, random_access)
    ) as files:
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism),
      ) as writer:
        writer.write_messages(sample_message(i, 10000) for i in range(23))
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
      ) as reader:
        self.assertEqual(
            list(reader.read_messages(records_test_pb2.SimpleMessage)),
            [sample_message(i, 10000) for i in range(23)],
        )

  @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM
  def test_write_read_messages_with_field_projection(
      self, file_spec, random_access, parallelism
  ):
    with contextlib.closing(
        file_spec(self.create_tempfile, random_access)
    ) as files:
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism, transpose=True),
      ) as writer:
        writer.write_messages(sample_message(i, 10000) for i in range(23))
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
          field_projection=[[
              records_test_pb2.SimpleMessage.DESCRIPTOR.fields_by_name[
                  'id'
              ].number
          ]],
      ) as reader:
        self.assertEqual(
            list(reader.read_messages(records_test_pb2.SimpleMessage)),
            [sample_message_id_only(i) for i in range(23)],
        )

  @_PARAMETERIZE_BY_FILE_SPEC_AND_PARALLELISM
  def test_write_read_messages_with_field_projection_later(
      self, file_spec, parallelism
  ):
    with contextlib.closing(
        file_spec(
            self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS
        )
    ) as files:
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism, transpose=True),
      ) as writer:
        writer.write_messages(sample_message(i, 10000) for i in range(23))
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
      ) as reader:
        for i in range(4):
          self.assertEqual(
              reader.read_message(records_test_pb2.SimpleMessage),
              sample_message(i, 10000),
          )
        reader.set_field_projection([[
            records_test_pb2.SimpleMessage.DESCRIPTOR.fields_by_name[
                'id'
            ].number
        ]])
        for i in range(4, 14):
          self.assertEqual(
              reader.read_message(records_test_pb2.SimpleMessage),
              sample_message_id_only(i),
          )
        reader.set_field_projection(None)
        for i in range(14, 23):
          self.assertEqual(
              reader.read_message(records_test_pb2.SimpleMessage),
              sample_message(i, 10000),
          )
        self.assertIsNone(reader.read_message(records_test_pb2.SimpleMessage))

  @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM
  def test_write_read_metadata(self, file_spec, random_access, parallelism):
    with contextlib.closing(
        file_spec(self.create_tempfile, random_access)
    ) as files:
      metadata_written = riegeli.RecordsMetadata()
      metadata_written.file_comment = 'Comment'
      riegeli.set_record_type(metadata_written, records_test_pb2.SimpleMessage)
      message_written = sample_message(7, 10)
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism),
          metadata=metadata_written,
      ) as writer:
        writer.write_message(message_written)
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
      ) as reader:
        metadata_read = reader.read_metadata()
        self.assertEqual(metadata_read, metadata_written)
        record_type = riegeli.get_record_type(metadata_read)
        assert record_type is not None
        self.assertEqual(
            record_type.DESCRIPTOR.full_name, 'riegeli.tests.SimpleMessage'
        )
        message_read = reader.read_message(record_type)
        assert message_read is not None
        # Serialize and deserialize because messages have descriptors of
        # different origins.
        self.assertEqual(
            records_test_pb2.SimpleMessage.FromString(
                message_read.SerializeToString()
            ),
            message_written,
        )

  @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM
  def test_invalid_metadata_exception(
      self, file_spec, random_access, parallelism
  ):
    with contextlib.closing(
        file_spec(self.create_tempfile, random_access)
    ) as files:
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism),
          serialized_metadata=sample_invalid_message(100),
      ):
        pass
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
      ) as reader:
        with self.assertRaises(message.DecodeError):
          reader.read_metadata()

  @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM
  def test_invalid_metadata_recovery(
      self, file_spec, random_access, parallelism
  ):
    with contextlib.closing(
        file_spec(self.create_tempfile, random_access)
    ) as files:
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism),
          serialized_metadata=sample_invalid_message(100),
      ):
        pass

      def recovery(skipped_region):
        pass

      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
          recovery=recovery,
      ) as reader:
        self.assertEqual(reader.read_metadata(), riegeli.RecordsMetadata())

  @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM
  def test_invalid_metadata_recovery_stop_iteration(
      self, file_spec, random_access, parallelism
  ):
    with contextlib.closing(
        file_spec(self.create_tempfile, random_access)
    ) as files:
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism),
          serialized_metadata=sample_invalid_message(100),
      ):
        pass

      def recovery(skipped_region):
        raise StopIteration

      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
          recovery=recovery,
      ) as reader:
        self.assertIsNone(reader.read_metadata())

  @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM
  def test_field_projection(self, file_spec, random_access, parallelism):
    with contextlib.closing(
        file_spec(self.create_tempfile, random_access)
    ) as files:
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=f'{record_writer_options(parallelism)},transpose',
      ) as writer:
        for i in range(23):
          writer.write_message(sample_message(i, 10000))
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
          field_projection=[[
              records_test_pb2.SimpleMessage.DESCRIPTOR.fields_by_name[
                  'id'
              ].number
          ]],
      ) as reader:
        for i in range(23):
          self.assertEqual(
              reader.read_message(records_test_pb2.SimpleMessage),
              records_test_pb2.SimpleMessage(id=i),
          )
        self.assertIsNone(reader.read_message(records_test_pb2.SimpleMessage))

  @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM
  def test_field_projection_existence_only(
      self, file_spec, random_access, parallelism
  ):
    with contextlib.closing(
        file_spec(self.create_tempfile, random_access)
    ) as files:
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=f'{record_writer_options(parallelism)},transpose',
      ) as writer:
        for i in range(23):
          writer.write_message(sample_message(i, 10000))
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
          field_projection=[
              [
                  records_test_pb2.SimpleMessage.DESCRIPTOR.fields_by_name[
                      'id'
                  ].number
              ],
              [
                  records_test_pb2.SimpleMessage.DESCRIPTOR.fields_by_name[
                      'payload'
                  ].number,
                  riegeli.EXISTENCE_ONLY,
              ],
          ],
      ) as reader:
        for i in range(23):
          self.assertEqual(
              reader.read_message(records_test_pb2.SimpleMessage),
              records_test_pb2.SimpleMessage(id=i, payload=b''),
          )
        self.assertIsNone(reader.read_message(records_test_pb2.SimpleMessage))

  @_PARAMETERIZE_BY_FILE_SPEC_AND_PARALLELISM
  def test_seek(self, file_spec, parallelism):
    with contextlib.closing(
        file_spec(
            self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS
        )
    ) as files:
      positions = []
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism),
      ) as writer:
        for i in range(23):
          pos = writer.pos
          writer.write_record(sample_string(i, 10000))
          canonical_pos = writer.last_pos
          if positions:
            self.assertGreater(pos, positions[-1])
          self.assertLessEqual(pos, canonical_pos)
          positions.append(canonical_pos)
        writer.close()
        end_pos = writer.pos
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
      ) as reader:
        reader.seek(positions[9])
        self.assertGreater(reader.pos, positions[8])
        self.assertLessEqual(reader.pos, positions[9])
        reader.seek(positions[9])
        self.assertGreater(reader.pos, positions[8])
        self.assertLessEqual(reader.pos, positions[9])
        reader.seek(positions[11])
        self.assertGreater(reader.pos, positions[10])
        self.assertLessEqual(reader.pos, positions[11])
        self.assertEqual(reader.read_record(), sample_string(11, 10000))
        reader.seek(positions[9])
        self.assertGreater(reader.pos, positions[8])
        self.assertLessEqual(reader.pos, positions[9])
        self.assertEqual(reader.read_record(), sample_string(9, 10000))
        reader.seek(positions[11])
        self.assertGreater(reader.pos, positions[10])
        self.assertLessEqual(reader.pos, positions[11])
        self.assertEqual(reader.read_record(), sample_string(11, 10000))
        reader.seek(positions[13])
        self.assertGreater(reader.pos, positions[12])
        self.assertLessEqual(reader.pos, positions[13])
        self.assertEqual(reader.read_record(), sample_string(13, 10000))
        reader.seek(riegeli.RecordPosition(0, 0))
        self.assertLessEqual(reader.pos, positions[0])
        self.assertEqual(reader.read_record(), sample_string(0, 10000))
        reader.seek(end_pos)
        self.assertLessEqual(reader.pos, end_pos)
        self.assertIsNone(reader.read_record())
        reader.seek(positions[11])
        self.assertGreater(reader.pos, positions[10])
        self.assertLessEqual(reader.pos, positions[11])
        reader.close()
        self.assertGreater(reader.pos, positions[10])
        self.assertLessEqual(reader.pos, positions[11])

  @_PARAMETERIZE_BY_FILE_SPEC_AND_PARALLELISM
  def test_seek_numeric(self, file_spec, parallelism):
    with contextlib.closing(
        file_spec(
            self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS
        )
    ) as files:
      positions = []
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism),
      ) as writer:
        for i in range(23):
          pos = writer.pos
          writer.write_record(sample_string(i, 10000))
          canonical_pos = writer.last_pos
          if positions:
            self.assertGreater(pos, positions[-1])
          self.assertLessEqual(pos, canonical_pos)
          positions.append(canonical_pos)
        writer.close()
        end_pos = writer.pos
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
      ) as reader:
        reader.seek_numeric(positions[9].numeric)
        self.assertGreater(reader.pos, positions[8])
        self.assertLessEqual(reader.pos, positions[9])
        reader.seek_numeric(positions[9].numeric)
        self.assertGreater(reader.pos, positions[8])
        self.assertLessEqual(reader.pos, positions[9])
        reader.seek_numeric(positions[11].numeric)
        self.assertGreater(reader.pos, positions[10])
        self.assertLessEqual(reader.pos, positions[11])
        self.assertEqual(reader.read_record(), sample_string(11, 10000))
        reader.seek_numeric(positions[9].numeric)
        self.assertGreater(reader.pos, positions[8])
        self.assertLessEqual(reader.pos, positions[9])
        self.assertEqual(reader.read_record(), sample_string(9, 10000))
        reader.seek_numeric(positions[11].numeric)
        self.assertGreater(reader.pos, positions[10])
        self.assertLessEqual(reader.pos, positions[11])
        self.assertEqual(reader.read_record(), sample_string(11, 10000))
        reader.seek_numeric(positions[13].numeric)
        self.assertGreater(reader.pos, positions[12])
        self.assertLessEqual(reader.pos, positions[13])
        self.assertEqual(reader.read_record(), sample_string(13, 10000))
        reader.seek_numeric(0)
        self.assertLessEqual(reader.pos, positions[0])
        self.assertEqual(reader.read_record(), sample_string(0, 10000))
        reader.seek_numeric(end_pos.numeric)
        self.assertLessEqual(reader.pos, end_pos)
        self.assertIsNone(reader.read_record())
        reader.seek_numeric(positions[11].numeric)
        self.assertGreater(reader.pos, positions[10])
        self.assertLessEqual(reader.pos, positions[11])
        reader.close()
        self.assertGreater(reader.pos, positions[10])
        self.assertLessEqual(reader.pos, positions[11])

  @_PARAMETERIZE_BY_FILE_SPEC
  def test_seek_back(self, file_spec):
    with contextlib.closing(
        file_spec(
            self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS
        )
    ) as files:
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism=0),
      ) as writer:
        for i in range(23):
          writer.write_record(sample_string(i, 10000))
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
      ) as reader:
        reader.seek_numeric(reader.size())
        for i in reversed(range(23)):
          self.assertTrue(reader.seek_back())
          self.assertEqual(reader.read_record(), sample_string(i, 10000))
          self.assertTrue(reader.seek_back())
        self.assertFalse(reader.seek_back())

  @_PARAMETERIZE_BY_FILE_SPEC
  def test_search(self, file_spec):
    with contextlib.closing(
        file_spec(
            self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS
        )
    ) as files:
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism=0),
      ) as writer:
        positions = []
        for i in range(23):
          writer.write_message(sample_message(i, 10000))
          positions.append(writer.last_pos)
        writer.close()
        end_pos = writer.pos
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
      ) as reader:

        def test_function(search_target):
          def test(record_reader):
            msg = record_reader.read_message(records_test_pb2.SimpleMessage)
            return (msg.id > search_target) - (msg.id < search_target)

          return test

        self.assertEqual(reader.search(test_function(7)), 0)
        self.assertEqual(reader.pos, positions[7])
        self.assertEqual(reader.search(test_function(0)), 0)
        self.assertEqual(reader.pos, positions[0])
        self.assertEqual(reader.search(test_function(22)), 0)
        self.assertEqual(reader.pos, positions[22])
        self.assertEqual(reader.search(test_function(23)), -1)
        self.assertEqual(reader.pos, end_pos)

  @_PARAMETERIZE_BY_FILE_SPEC
  def test_search_for_record(self, file_spec):
    with contextlib.closing(
        file_spec(
            self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS
        )
    ) as files:
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism=0),
      ) as writer:
        positions = []
        for i in range(23):
          writer.write_message(sample_message(i, 10000))
          positions.append(writer.last_pos)
        writer.close()
        end_pos = writer.pos
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
      ) as reader:

        def test_function(search_target):
          def test(record):
            msg = records_test_pb2.SimpleMessage.FromString(record)
            return (msg.id > search_target) - (msg.id < search_target)

          return test

        self.assertEqual(reader.search_for_record(test_function(7)), 0)
        self.assertEqual(reader.pos, positions[7])
        self.assertEqual(reader.search_for_record(test_function(0)), 0)
        self.assertEqual(reader.pos, positions[0])
        self.assertEqual(reader.search_for_record(test_function(22)), 0)
        self.assertEqual(reader.pos, positions[22])
        self.assertEqual(reader.search_for_record(test_function(23)), -1)
        self.assertEqual(reader.pos, end_pos)

  @_PARAMETERIZE_BY_FILE_SPEC
  def test_search_for_record_stop_iteration(self, file_spec):
    with contextlib.closing(
        file_spec(
            self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS
        )
    ) as files:
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism=0),
      ) as writer:
        for i in range(23):
          writer.write_message(sample_message(i, 10000))
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
      ) as reader:

        def test(record):
          raise StopIteration

        self.assertIsNone(reader.search_for_record(test))

  @_PARAMETERIZE_BY_FILE_SPEC
  def test_search_for_message(self, file_spec):
    with contextlib.closing(
        file_spec(
            self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS
        )
    ) as files:
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism=0),
      ) as writer:
        positions = []
        for i in range(23):
          writer.write_message(sample_message(i, 10000))
          positions.append(writer.last_pos)
        writer.close()
        end_pos = writer.pos
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
      ) as reader:

        def test_function(search_target):

          def test(msg):
            return (msg.id > search_target) - (msg.id < search_target)

          return test

        self.assertEqual(
            reader.search_for_message(
                records_test_pb2.SimpleMessage, test_function(7)
            ),
            0,
        )
        self.assertEqual(reader.pos, positions[7])
        self.assertEqual(
            reader.search_for_message(
                records_test_pb2.SimpleMessage, test_function(0)
            ),
            0,
        )
        self.assertEqual(reader.pos, positions[0])
        self.assertEqual(
            reader.search_for_message(
                records_test_pb2.SimpleMessage, test_function(22)
            ),
            0,
        )
        self.assertEqual(reader.pos, positions[22])
        self.assertEqual(
            reader.search_for_message(
                records_test_pb2.SimpleMessage, test_function(23)
            ),
            -1,
        )
        self.assertEqual(reader.pos, end_pos)

  @_PARAMETERIZE_BY_FILE_SPEC
  def test_search_for_message_stop_iteration(self, file_spec):
    with contextlib.closing(
        file_spec(
            self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS
        )
    ) as files:
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism=0),
      ) as writer:
        for i in range(23):
          writer.write_message(sample_message(i, 10000))
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
      ) as reader:

        def test(record):
          raise StopIteration

        self.assertIsNone(
            reader.search_for_message(records_test_pb2.SimpleMessage, test)
        )

  @_PARAMETERIZE_BY_FILE_SPEC
  def test_search_for_invalid_message_exception(self, file_spec):
    with contextlib.closing(
        file_spec(
            self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS
        )
    ) as files:
      # Write 1 valid message, 1 invalid message, and 1 valid message, each in a
      # separate chunk.
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism=0, chunk_size=15000),
      ) as writer:
        writer.write_message(sample_message(0, 10000))
        writer.write_record(sample_invalid_message(10000))
        writer.write_message(sample_message(2, 10000))
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
      ) as reader:

        def test_function(search_target):
          def test(msg):
            return (msg.id > search_target) - (msg.id < search_target)

          return test

        with self.assertRaises(message.DecodeError):
          reader.search_for_message(
              records_test_pb2.SimpleMessage, test_function(1)
          )

  @_PARAMETERIZE_BY_FILE_SPEC
  def test_search_for_invalid_message_recovery(self, file_spec):
    with contextlib.closing(
        file_spec(
            self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS
        )
    ) as files:
      # Write 1 valid message, 1 invalid message, and 1 valid message, each in a
      # separate chunk.
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism=0, chunk_size=15000),
      ) as writer:
        positions = []
        writer.write_message(sample_message(0, 10000))
        positions.append(writer.last_pos)
        writer.write_record(sample_invalid_message(10000))
        positions.append(writer.last_pos)
        writer.write_message(sample_message(2, 10000))
        positions.append(writer.last_pos)
        writer.close()

      def recovery(skipped_region):
        pass

      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
          recovery=recovery,
      ) as reader:

        def test_function(search_target):
          def test(msg):
            return (msg.id > search_target) - (msg.id < search_target)

          return test

        self.assertEqual(
            reader.search_for_message(
                records_test_pb2.SimpleMessage, test_function(1)
            ),
            1,
        )
        self.assertEqual(reader.pos, positions[2])

  @_PARAMETERIZE_BY_FILE_SPEC
  def test_search_for_invalid_message_recovery_stop_iteration(self, file_spec):
    with contextlib.closing(
        file_spec(
            self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS
        )
    ) as files:
      # Write 1 valid message, 1 invalid message, and 1 valid message, each in a
      # separate chunk.
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism=0, chunk_size=15000),
      ) as writer:
        positions = []
        writer.write_message(sample_message(0, 10000))
        positions.append(writer.last_pos)
        writer.write_record(sample_invalid_message(10000))
        positions.append(writer.last_pos)
        writer.write_message(sample_message(2, 10000))
        positions.append(writer.last_pos)
        writer.close()

      def recovery(skipped_region):
        raise StopIteration

      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
          recovery=recovery,
      ) as reader:

        def test_function(search_target):
          def test(msg):
            return (msg.id > search_target) - (msg.id < search_target)

          return test

        self.assertIsNone(
            reader.search_for_message(
                records_test_pb2.SimpleMessage, test_function(1)
            )
        )

  @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM
  def test_corruption_exception(self, file_spec, random_access, parallelism):
    with contextlib.closing(
        file_spec(self.create_tempfile, random_access)
    ) as files:
      positions = []
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism),
      ) as writer:
        for i in range(23):
          writer.write_record(sample_string(i, 10000))
          positions.append(writer.last_pos)
      # Corrupt the header of the chunk containing records [9..12).
      self.corrupt_at(files, positions[9].chunk_begin + 20)
      # Read records [0..9) successfully (all before the corrupted chunk).
      reader = riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
      )
      for i in range(9):
        self.assertEqual(reader.read_record(), sample_string(i, 10000))
      with self.assertRaises(riegeli.RiegeliError):
        reader.read_record()
      with self.assertRaises(riegeli.RiegeliError):
        reader.close()

  @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM
  def test_corruption_recovery(self, file_spec, random_access, parallelism):
    with contextlib.closing(
        file_spec(self.create_tempfile, random_access)
    ) as files:
      positions = []
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism),
      ) as writer:
        for i in range(23):
          writer.write_record(sample_string(i, 10000))
          positions.append(writer.last_pos)
      # Corrupt the header of the chunk containing records [9..12).
      self.corrupt_at(files, positions[9].chunk_begin + 20)
      # Read records [0..9) and [15..23) successfully (all except the corrupted
      # chunk and the next chunk which intersects the same block).
      skipped_regions = []
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
          recovery=skipped_regions.append,
      ) as reader:
        for i in range(9):
          self.assertEqual(reader.read_record(), sample_string(i, 10000))
        for i in range(15, 23):
          self.assertEqual(reader.read_record(), sample_string(i, 10000))
        self.assertIsNone(reader.read_record())
      self.assertLen(skipped_regions, 1)
      skipped_region = skipped_regions[0]
      self.assertEqual(skipped_region.begin, positions[9].numeric)
      self.assertEqual(skipped_region.end, positions[15].numeric)

  @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM
  def test_corruption_recovery_stop_iteration(
      self, file_spec, random_access, parallelism
  ):
    with contextlib.closing(
        file_spec(self.create_tempfile, random_access)
    ) as files:
      positions = []
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism),
      ) as writer:
        for i in range(23):
          writer.write_record(sample_string(i, 10000))
          positions.append(writer.last_pos)
      # Corrupt the header of the chunk containing records [9..12).
      self.corrupt_at(files, positions[9].chunk_begin + 20)
      # Read records [0..9) successfully (all before the corrupted chunk).
      skipped_regions = []

      def recovery(skipped_region):
        skipped_regions.append(skipped_region)
        raise StopIteration

      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
          recovery=recovery,
      ) as reader:
        for i in range(9):
          self.assertEqual(reader.read_record(), sample_string(i, 10000))
        self.assertIsNone(reader.read_record())
      self.assertLen(skipped_regions, 1)
      skipped_region = skipped_regions[0]
      self.assertEqual(skipped_region.begin, positions[9].numeric)
      self.assertEqual(skipped_region.end, positions[15].numeric)

  @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM
  def test_corruption_recovery_exception(
      self, file_spec, random_access, parallelism
  ):
    with contextlib.closing(
        file_spec(self.create_tempfile, random_access)
    ) as files:
      positions = []
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism),
      ) as writer:
        for i in range(23):
          writer.write_record(sample_string(i, 10000))
          positions.append(writer.last_pos)
      # Corrupt the header of the chunk containing records [9..12).
      self.corrupt_at(files, positions[9].chunk_begin + 20)

      # Propagate exception from the recovery function
      def recovery(skipped_region):
        raise KeyboardInterrupt

      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
          recovery=recovery,
      ) as reader:
        for i in range(9):
          self.assertEqual(reader.read_record(), sample_string(i, 10000))
        with self.assertRaises(KeyboardInterrupt):
          reader.read_record()

  @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM
  def test_invalid_message_exception(
      self, file_spec, random_access, parallelism
  ):
    with contextlib.closing(
        file_spec(self.create_tempfile, random_access)
    ) as files:
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism),
      ) as writer:
        for i in range(9):
          writer.write_message(sample_message(i, 10000))
        for i in range(9, 10):
          writer.write_record(sample_invalid_message(10000))
        for i in range(10, 14):
          writer.write_message(sample_message(i, 10000))
        for i in range(14, 15):
          writer.write_record(sample_invalid_message(10000))
        for i in range(15, 23):
          writer.write_message(sample_message(i, 10000))
      # Read messages [0..9), [10..14), and [15, 23) successfully (all except
      # invalid messages), raising exceptions for invalid messages
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
      ) as reader:
        for i in range(9):
          self.assertEqual(
              reader.read_message(records_test_pb2.SimpleMessage),
              sample_message(i, 10000),
          )
        with self.assertRaises(message.DecodeError):
          reader.read_message(records_test_pb2.SimpleMessage)
        for i in range(10, 14):
          self.assertEqual(
              reader.read_message(records_test_pb2.SimpleMessage),
              sample_message(i, 10000),
          )
        with self.assertRaises(message.DecodeError):
          reader.read_message(records_test_pb2.SimpleMessage)
        for i in range(15, 23):
          self.assertEqual(
              reader.read_message(records_test_pb2.SimpleMessage),
              sample_message(i, 10000),
          )

  @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM
  def test_invalid_message_recovery(
      self, file_spec, random_access, parallelism
  ):
    with contextlib.closing(
        file_spec(self.create_tempfile, random_access)
    ) as files:
      positions = []
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism),
      ) as writer:
        for i in range(9):
          writer.write_message(sample_message(i, 10000))
          positions.append(writer.last_pos)
        for i in range(9, 10):
          writer.write_record(sample_invalid_message(10000))
          positions.append(writer.last_pos)
        for i in range(10, 14):
          writer.write_message(sample_message(i, 10000))
          positions.append(writer.last_pos)
        for i in range(14, 15):
          writer.write_record(sample_invalid_message(10000))
          positions.append(writer.last_pos)
        for i in range(15, 23):
          writer.write_message(sample_message(i, 10000))
          positions.append(writer.last_pos)
      # Read messages [0..9), [10..14), and [15, 23) successfully (all except
      # invalid messages).
      skipped_regions = []
      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
          recovery=skipped_regions.append,
      ) as reader:
        for i in range(9):
          self.assertEqual(
              reader.read_message(records_test_pb2.SimpleMessage),
              sample_message(i, 10000),
          )
        for i in range(10, 14):
          self.assertEqual(
              reader.read_message(records_test_pb2.SimpleMessage),
              sample_message(i, 10000),
          )
        for i in range(15, 23):
          self.assertEqual(
              reader.read_message(records_test_pb2.SimpleMessage),
              sample_message(i, 10000),
          )
        self.assertIsNone(reader.read_message(records_test_pb2.SimpleMessage))
      self.assertLen(skipped_regions, 2)
      skipped_region = skipped_regions[0]
      self.assertEqual(skipped_region.begin, positions[9].numeric)
      self.assertEqual(skipped_region.end, positions[10].numeric)
      skipped_region = skipped_regions[1]
      self.assertEqual(skipped_region.begin, positions[14].numeric)
      self.assertEqual(skipped_region.end, positions[15].numeric)

  @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM
  def test_invalid_message_recovery_stop_iteration(
      self, file_spec, random_access, parallelism
  ):
    with contextlib.closing(
        file_spec(self.create_tempfile, random_access)
    ) as files:
      positions = []
      with riegeli.RecordWriter(
          files.writing_open(),
          owns_dest=files.writing_should_close,
          assumed_pos=files.writing_assumed_pos,
          options=record_writer_options(parallelism),
      ) as writer:
        for i in range(9):
          writer.write_message(sample_message(i, 10000))
          positions.append(writer.last_pos)
        for i in range(9, 10):
          writer.write_record(sample_invalid_message(10000))
          positions.append(writer.last_pos)
        for i in range(10, 14):
          writer.write_message(sample_message(i, 10000))
          positions.append(writer.last_pos)
        for i in range(14, 15):
          writer.write_record(sample_invalid_message(10000))
          positions.append(writer.last_pos)
        for i in range(15, 23):
          writer.write_message(sample_message(i, 10000))
          positions.append(writer.last_pos)
      # Read messages [0..9) successfully (all before the first invalid
      # message).
      skipped_regions = []

      def recovery(skipped_region):
        skipped_regions.append(skipped_region)
        raise StopIteration

      with riegeli.RecordReader(
          files.reading_open(),
          owns_src=files.reading_should_close,
          assumed_pos=files.reading_assumed_pos,
          recovery=recovery,
      ) as reader:
        for i in range(9):
          self.assertEqual(
              reader.read_message(records_test_pb2.SimpleMessage),
              sample_message(i, 10000),
          )
        self.assertIsNone(reader.read_message(records_test_pb2.SimpleMessage))
      self.assertLen(skipped_regions, 1)
      skipped_region = skipped_regions[0]
      self.assertEqual(skipped_region.begin, positions[9].numeric)
      self.assertEqual(skipped_region.end, positions[10].numeric)


if __name__ == '__main__':
  absltest.main()


================================================
FILE: python/riegeli/tensorflow/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_binary")
load("@rules_python//python:defs.bzl", "py_library", "py_test")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

py_library(
    name = "riegeli_dataset_ops",
    srcs = ["ops/riegeli_dataset_ops.py"],
    data = [":ops/_riegeli_dataset_ops.so"],
)

cc_binary(
    name = "ops/_riegeli_dataset_ops.so",
    srcs = [
        "//riegeli/tensorflow:kernels/riegeli_dataset_ops.cc",
        "//riegeli/tensorflow:ops/riegeli_dataset_ops.cc",
    ],
    # tensorflow/core/lib/core/refcount.h needs NDEBUG consistency between
    # translation units.
    copts = ["-DNDEBUG"],
    linkshared = True,
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/records:record_position",
        "//riegeli/records:record_reader",
        "//riegeli/records:skipped_region",
        "//riegeli/tensorflow/io:file_reader",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/synchronization",
        "@local_config_tf//:libtensorflow_framework",
        "@local_config_tf//:tf_header_lib",
    ],
)

py_test(
    name = "riegeli_dataset_test",
    srcs = ["kernel_tests/riegeli_dataset_test.py"],
    deps = [
        ":riegeli_dataset_ops",
        "//python/riegeli",
    ],
)


================================================
FILE: python/riegeli/tensorflow/__init__.py
================================================


================================================
FILE: python/riegeli/tensorflow/kernel_tests/__init__.py
================================================


================================================
FILE: python/riegeli/tensorflow/kernel_tests/riegeli_dataset_test.py
================================================
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for RiegeliDataset."""

import os

import riegeli
from riegeli.tensorflow.ops import riegeli_dataset_ops
import tensorflow as tf
from tensorflow.python.data.ops import dataset_ops
from tensorflow.python.data.util import nest
from tensorflow.python.eager import context
from tensorflow.python.framework import errors
from tensorflow.python.framework import sparse_tensor
from tensorflow.python.framework import test_util
from tensorflow.python.ops import tensor_array_ops
from tensorflow.python.platform import test


# Adapted from tensorflow/python/data/kernel_tests/test_base.py
# which has restricted visibility.
class DatasetTestBase(test.TestCase):
  """Base class for dataset tests."""

  def assertValuesEqual(self, expected, actual):
    """Asserts that two values are equal."""
    if sparse_tensor.is_sparse(expected):
      self.assertAllEqual(expected.indices, actual.indices)
      self.assertAllEqual(expected.values, actual.values)
      self.assertAllEqual(expected.dense_shape, actual.dense_shape)
    else:
      self.assertAllEqual(expected, actual)

  def getNext(self, dataset, requires_initialization=False, shared_name=None):
    """Returns a callable that returns the next element of the dataset.

    Example use:
    ```python
    # In both graph and eager modes
    dataset = ...
    get_next = self.getNext(dataset)
    result = self.evaluate(get_next())
    ```

    Args:
      dataset: A dataset whose elements will be returned.
      requires_initialization: Indicates that when the test is executed in graph
        mode, it should use an initializable iterator to iterate through the
        dataset (e.g. when it contains stateful nodes). Defaults to False.
      shared_name: (Optional.) If non-empty, the returned iterator will be
        shared under the given name across multiple sessions that share the same
        devices (e.g. when using a remote server).

    Returns:
      A callable that returns the next element of `dataset`. Any `TensorArray`
      objects `dataset` outputs are stacked.
    """

    def ta_wrapper(gn):
      def _wrapper():
        r = gn()
        if isinstance(r, tensor_array_ops.TensorArray):
          return r.stack()
        else:
          return r

      return _wrapper

    if context.executing_eagerly():
      iterator = iter(dataset)
      return ta_wrapper(iterator._next_internal)  # pylint: disable=protected-access
    else:
      if requires_initialization:
        iterator = dataset_ops.make_initializable_iterator(dataset, shared_name)
        self.evaluate(iterator.initializer)
      else:
        iterator = dataset_ops.make_one_shot_iterator(dataset)
      get_next = iterator.get_next()
      return ta_wrapper(lambda: get_next)

  def _compareOutputToExpected(
      self, result_values, expected_values, assert_items_equal
  ):
    if assert_items_equal:
      self.assertItemsEqual(result_values, expected_values)
      return
    for i in range(len(result_values)):
      nest.assert_same_structure(result_values[i], expected_values[i])
      for result_value, expected_value in zip(
          nest.flatten(result_values[i]), nest.flatten(expected_values[i])
      ):
        self.assertValuesEqual(expected_value, result_value)

  def assertDatasetProduces(
      self,
      dataset,
      expected_output=None,
      expected_shapes=None,
      expected_error=None,
      requires_initialization=False,
      num_test_iterations=1,
      assert_items_equal=False,
      expected_error_iter=1,
  ):
    """Asserts that a dataset produces the expected output / error.

    Args:
      dataset: A dataset to check for the expected output / error.
      expected_output: A list of elements that the dataset is expected to
        produce.
      expected_shapes: A list of TensorShapes which is expected to match
        output_shapes of dataset.
      expected_error: A tuple `(type, predicate)` identifying the expected error
        `dataset` should raise. The `type` should match the expected exception
        type, while `predicate` should either be 1) a unary function that inputs
        the raised exception and returns a boolean indicator of success or 2) a
        regular expression that is expected to match the error message
        partially.
      requires_initialization: Indicates that when the test is executed in graph
        mode, it should use an initializable iterator to iterate through the
        dataset (e.g. when it contains stateful nodes). Defaults to False.
      num_test_iterations: Number of times `dataset` will be iterated. Defaults
        to 2.
      assert_items_equal: Tests expected_output has (only) the same elements
        regardless of order.
      expected_error_iter: How many times to iterate before expecting an error,
        if an error is expected.
    """
    self.assertTrue(
        expected_error is not None or expected_output is not None,
        'Exactly one of expected_output or expected error should be provided.',
    )
    if expected_error:
      self.assertTrue(
          expected_output is None,
          (
              'Exactly one of expected_output or expected error should be'
              ' provided.'
          ),
      )
      with self.assertRaisesWithPredicateMatch(
          expected_error[0], expected_error[1]
      ):
        get_next = self.getNext(
            dataset, requires_initialization=requires_initialization
        )
        for _ in range(expected_error_iter):
          self.evaluate(get_next())
      return
    if expected_shapes:
      self.assertEqual(
          expected_shapes, dataset_ops.get_legacy_output_shapes(dataset)
      )
    self.assertGreater(num_test_iterations, 0)
    for _ in range(num_test_iterations):
      get_next = self.getNext(
          dataset, requires_initialization=requires_initialization
      )
      result = []
      for _ in range(len(expected_output)):
        result.append(self.evaluate(get_next()))
      self._compareOutputToExpected(result, expected_output, assert_items_equal)
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(get_next())
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(get_next())


@test_util.run_all_in_graph_and_eager_modes
class RiegeliDatasetTest(DatasetTestBase):

  def setUp(self):
    super().setUp()
    self._num_files = 2
    self._num_records = 7

    self.test_filenames = self._create_files()

  def dataset_fn(self, filenames, num_epochs=1, batch_size=None):
    repeat_dataset = riegeli_dataset_ops.RiegeliDataset(filenames).repeat(
        num_epochs
    )
    if batch_size:
      return repeat_dataset.batch(batch_size)
    return repeat_dataset

  def _record(self, f, r):
    return f'Record {r} of file {f}'.encode()

  def _create_files(self):
    filenames = []
    for i in range(self._num_files):
      filename = os.path.join(self.get_temp_dir(), f'riegeli.{i}')
      filenames.append(filename)

      # Note: if records were serialized proto messages, passing
      # options='transpose' to RecordWriter would make compression better.
      with riegeli.RecordWriter(tf.io.gfile.GFile(filename, 'wb')) as writer:
        for j in range(self._num_records):
          writer.write_record(self._record(i, j))
    return filenames

  def test_read_one_epoch(self):
    # Basic test: read from file 0.
    dataset = self.dataset_fn(self.test_filenames[0])
    self.assertDatasetProduces(
        dataset,
        expected_output=[self._record(0, i) for i in range(self._num_records)],
    )

    # Basic test: read from file 1.
    dataset = self.dataset_fn(self.test_filenames[1])
    self.assertDatasetProduces(
        dataset,
        expected_output=[self._record(1, i) for i in range(self._num_records)],
    )

    # Basic test: read from both files.
    dataset = self.dataset_fn(self.test_filenames)
    expected_output = []
    for j in range(self._num_files):
      expected_output.extend(
          [self._record(j, i) for i in range(self._num_records)]
      )
    self.assertDatasetProduces(dataset, expected_output=expected_output)

  def test_read_ten_epochs(self):
    dataset = self.dataset_fn(self.test_filenames, num_epochs=10)
    expected_output = []
    for j in range(self._num_files):
      expected_output.extend(
          [self._record(j, i) for i in range(self._num_records)]
      )
    self.assertDatasetProduces(dataset, expected_output=expected_output * 10)

  def test_read_ten_epochs_of_batches(self):
    dataset = self.dataset_fn(
        self.test_filenames, num_epochs=10, batch_size=self._num_records
    )
    expected_output = []
    for j in range(self._num_files):
      expected_output.append(
          [self._record(j, i) for i in range(self._num_records)]
      )
    self.assertDatasetProduces(dataset, expected_output=expected_output * 10)


if __name__ == '__main__':
  tf.test.main()


================================================
FILE: python/riegeli/tensorflow/ops/__init__.py
================================================


================================================
FILE: python/riegeli/tensorflow/ops/riegeli_dataset_ops.py
================================================
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""TensorFlow dataset for Riegeli/records files."""

import tensorflow as tf
from tensorflow.python.data.ops import dataset_ops
from tensorflow.python.data.util import convert
from tensorflow.python.framework import load_library

gen_riegeli_dataset_ops = load_library.load_op_library(
    tf.compat.v1.resource_loader.get_path_to_datafile('_riegeli_dataset_ops.so')
)

__all__ = ('RiegeliDataset',)

_DEFAULT_MIN_BUFFER_SIZE = 4 << 10
_DEFAULT_MAX_BUFFER_SIZE = 64 << 10


class RiegeliDataset(dataset_ops.DatasetSource):
  """A `Dataset` comprising records from one or more Riegeli/records files."""

  __slots__ = ('_filenames', '_min_buffer_size', '_max_buffer_size')

  def __init__(
      self,
      filenames,
      min_buffer_size=None,
      max_buffer_size=None,
      buffer_size=None,
  ):
    """Creates a `RiegeliDataset`.

    Args:
      filenames: A `tf.string` tensor containing one or more filenames.
      min_buffer_size: A `tf.int64` scalar which tunes the minimal buffer size,
        which determines how much data at a time is typically read from the
        file. The actual buffer size changes between min_buffer_size and
        max_buffer_size depending on the access pattern. Default: 4K.
      max_buffer_size: A `tf.int64` scalar which tunes the maximal buffer size,
        which determines how much data at a time is typically read from the
        file. The actual buffer size changes between min_buffer_size and
        max_buffer_size depending on the access pattern. Default: 64K.
      buffer_size: If not None, a shortcut for setting min_buffer_size and
        max_buffer_size to the same value.
    """
    if buffer_size is not None:
      min_buffer_size = buffer_size
      max_buffer_size = buffer_size
    self._filenames = tf.convert_to_tensor(filenames, name='filenames')
    self._min_buffer_size = convert.optional_param_to_tensor(
        'min_buffer_size',
        min_buffer_size,
        argument_default=_DEFAULT_MIN_BUFFER_SIZE,
    )
    self._max_buffer_size = convert.optional_param_to_tensor(
        'max_buffer_size',
        max_buffer_size,
        argument_default=_DEFAULT_MAX_BUFFER_SIZE,
    )
    variant_tensor = gen_riegeli_dataset_ops.riegeli_dataset(
        self._filenames, self._min_buffer_size, self._max_buffer_size
    )
    super().__init__(variant_tensor)

  @property
  def element_spec(self):
    return tf.TensorSpec([], tf.dtypes.string)


================================================
FILE: python/setup.py
================================================
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""PIP package setup for Riegeli."""

import setuptools
from setuptools import dist

with open('README.md', 'r') as fh:
  long_description = fh.read()


class BinaryDistribution(dist.Distribution):
  """This class is needed in order to create OS specific wheels."""

  def has_ext_modules(self):
    return True


setuptools.setup(
    name='riegeli',
    version='0.0.1',
    description='File format for storing a sequence of records',
    long_description=long_description,
    long_description_content_type='text/markdown',
    url='https://github.com/google/riegeli',
    author='Google LLC',
    author_email='compression-dev@google.com',
    license='Apache License, Version 2.0',
    python_requires='>=3.5,<4',
    install_requires=[
        'protobuf>=3.8.0,<4',
    ],
    extras_require={
        'tensorflow': ['tensorflow>=1.15,<3'],
    },
    packages=setuptools.find_packages(),
    include_package_data=True,
    package_data={'': ['**/*.so']},
    distclass=BinaryDistribution,
    classifiers=[
        'Programming Language :: Python',
        'Intended Audience :: Developers',
        'Programming Language :: Python :: 3',
        'Programming Language :: Python :: 3.5',
        'Programming Language :: Python :: 3.6',
        'Programming Language :: Python :: 3.7',
        'Programming Language :: Python :: 3.8',
        'License :: OSI Approved :: Apache Software License',
        'Operating System :: OS Independent',
        'Topic :: Software Development :: Libraries',
        'Topic :: Software Development :: Libraries :: Python Modules',
    ],
)


================================================
FILE: riegeli/.gitignore
================================================
bazel-*


================================================
FILE: riegeli/BUILD
================================================
# Riegeli, file format for storing a sequence of records.

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

exports_files(["LICENSE"])


================================================
FILE: riegeli/base/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "type_traits",
    hdrs = ["type_traits.h"],
    deps = [
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/utility",
    ],
)

cc_library(
    name = "constexpr",
    srcs = ["port.h"],
    hdrs = ["constexpr.h"],
    deps = ["@com_google_absl//absl/base:nullability"],
)

cc_library(
    name = "null_safe_memcpy",
    hdrs = ["null_safe_memcpy.h"],
    deps = [
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
    ],
)

cc_library(
    name = "compare",
    hdrs = ["compare.h"],
    deps = [
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:compare",
    ],
)

cc_library(
    name = "iterable",
    hdrs = ["iterable.h"],
    deps = [
        ":type_traits",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
    ],
)

cc_library(
    name = "stream_utils",
    srcs = ["stream_utils.cc"],
    hdrs = ["stream_utils.h"],
    deps = [
        ":types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "debug",
    srcs = ["debug.cc"],
    hdrs = ["debug.h"],
    deps = [
        ":stream_utils",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "assert",
    srcs = [
        "assert.cc",
        "port.h",
    ],
    hdrs = ["assert.h"],
    deps = [
        ":debug",
        ":stream_utils",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/log:absl_log",
    ],
)

cc_library(
    name = "types",
    hdrs = ["types.h"],
    deps = ["@com_google_absl//absl/base:nullability"],
)

cc_library(
    name = "arithmetic",
    hdrs = ["arithmetic.h"],
    deps = [
        ":assert",
        ":type_traits",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/numeric:bits",
        "@com_google_absl//absl/numeric:int128",
    ],
)

cc_library(
    name = "buffering",
    hdrs = ["buffering.h"],
    deps = [
        ":arithmetic",
        ":types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
    ],
)

cc_library(
    name = "estimated_allocated_size",
    hdrs = ["estimated_allocated_size.h"],
    deps = [
        ":arithmetic",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
    ],
)

cc_library(
    name = "new_aligned",
    hdrs = ["new_aligned.h"],
    deps = [
        ":arithmetic",
        ":assert",
        ":estimated_allocated_size",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/numeric:bits",
    ],
)

cc_library(
    name = "string_utils",
    srcs = ["string_utils.cc"],
    hdrs = ["string_utils.h"],
    deps = [
        ":arithmetic",
        ":assert",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/strings:resize_and_overwrite",
    ],
)

cc_library(
    name = "cord_utils",
    srcs = ["cord_utils.cc"],
    hdrs = ["cord_utils.h"],
    deps = [
        ":arithmetic",
        ":assert",
        ":buffering",
        ":string_utils",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/numeric:bits",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "unicode",
    srcs = ["unicode.cc"],
    hdrs = ["unicode.h"],
    features = select({
        # unicode.cc has #define before #include to influence what the included
        # files provide.
        "@platforms//os:windows": ["-use_header_modules"],
        "//conditions:default": [],
    }),
    deps = select({
        "@platforms//os:windows": [
            ":arithmetic",
            "@com_google_absl//absl/base:core_headers",
            "@com_google_absl//absl/base:nullability",
            "@com_google_absl//absl/strings:resize_and_overwrite",
            "@com_google_absl//absl/strings:string_view",
            "@com_google_absl//absl/types:span",
        ],
        "//conditions:default": [],
    }),
)

cc_library(
    name = "type_id",
    hdrs = ["type_id.h"],
    deps = [
        ":compare",
        "@com_google_absl//absl/base:nullability",
    ],
)

cc_library(
    name = "reset",
    hdrs = ["reset.h"],
    deps = [
        ":assert",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "type_erased_ref",
    hdrs = ["type_erased_ref.h"],
    deps = [
        ":type_traits",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/meta:type_traits",
    ],
)

cc_library(
    name = "initializer",
    srcs = ["initializer_internal.h"],
    hdrs = [
        "initializer.h",
        "invoker.h",
        "maker.h",
        "temporary_storage.h",
    ],
    deps = [
        ":assert",
        ":reset",
        ":type_erased_ref",
        ":type_traits",
        "@com_google_absl//absl/base",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
    ],
)

cc_library(
    name = "global",
    hdrs = ["global.h"],
    deps = ["@com_google_absl//absl/base:nullability"],
)

cc_library(
    name = "string_ref",
    hdrs = ["string_ref.h"],
    deps = [
        ":assert",
        ":compare",
        ":initializer",
        ":type_traits",
        "@com_google_absl//absl/base:config",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "bytes_ref",
    hdrs = ["bytes_ref.h"],
    deps = [
        ":compare",
        ":initializer",
        ":string_ref",
        ":type_traits",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "c_string_ref",
    hdrs = ["c_string_ref.h"],
    deps = [
        ":compare",
        ":initializer",
        ":string_ref",
        ":type_traits",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "memory_estimator",
    srcs = ["memory_estimator.cc"],
    hdrs = ["memory_estimator.h"],
    deps = [
        ":arithmetic",
        ":estimated_allocated_size",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/container:node_hash_map",
        "@com_google_absl//absl/container:node_hash_set",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "closing_ptr",
    hdrs = ["closing_ptr.h"],
    deps = [
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
    ],
)

cc_library(
    name = "dependency",
    hdrs = [
        "dependency.h",
        "dependency_base.h",
        "dependency_manager.h",
    ],
    deps = [
        ":assert",
        ":bytes_ref",
        ":compare",
        ":initializer",
        ":reset",
        ":type_traits",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/meta:type_traits",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "moving_dependency",
    hdrs = ["moving_dependency.h"],
    deps = [
        ":dependency",
        ":type_traits",
        "@com_google_absl//absl/base:core_headers",
    ],
)

cc_library(
    name = "stable_dependency",
    hdrs = ["stable_dependency.h"],
    deps = [
        ":assert",
        ":dependency",
        ":initializer",
        ":type_traits",
        "@com_google_absl//absl/base:core_headers",
    ],
)

cc_library(
    name = "any",
    srcs = ["any_internal.h"],
    hdrs = [
        "any.h",
        "any_initializer.h",
    ],
    deps = [
        ":arithmetic",
        ":assert",
        ":closing_ptr",
        ":compare",
        ":dependency",
        ":initializer",
        ":memory_estimator",
        ":type_erased_ref",
        ":type_id",
        ":type_traits",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/meta:type_traits",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "cord_iterator_span",
    srcs = ["cord_iterator_span.cc"],
    hdrs = ["cord_iterator_span.h"],
    deps = [
        ":arithmetic",
        ":assert",
        ":dependency",
        ":string_utils",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:resize_and_overwrite",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "status",
    srcs = [
        "errno_mapping.cc",
        "status.cc",
    ],
    hdrs = [
        "errno_mapping.h",
        "status.h",
    ],
    features = select({
        # errno_mapping.cc has #define before #include to influence what the
        # included files provide.
        "@platforms//os:windows": ["-use_header_modules"],
        "//conditions:default": [],
    }),
    deps = [
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ] + select({
        "@platforms//os:windows": [
            ":arithmetic",
            ":unicode",
            "@com_google_absl//absl/types:span",
        ],
        "//conditions:default": [],
    }),
)

cc_library(
    name = "object",
    srcs = ["object.cc"],
    hdrs = ["object.h"],
    deps = [
        ":assert",
        ":initializer",
        ":type_id",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
    ],
)

cc_library(
    name = "external_data",
    srcs = ["external_data.cc"],
    hdrs = ["external_data.h"],
    deps = ["@com_google_absl//absl/strings:string_view"],
)

cc_library(
    name = "shared_ptr",
    hdrs = [
        "intrusive_shared_ptr.h",
        "ownership.h",
        "ref_count.h",
        "shared_ptr.h",
    ],
    deps = [
        ":arithmetic",
        ":assert",
        ":compare",
        ":external_data",
        ":initializer",
        ":new_aligned",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
    ],
)

cc_library(
    name = "buffer",
    srcs = ["buffer.cc"],
    hdrs = ["buffer.h"],
    deps = [
        ":arithmetic",
        ":assert",
        ":buffering",
        ":estimated_allocated_size",
        ":external_data",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "shared_buffer",
    srcs = ["shared_buffer.cc"],
    hdrs = ["shared_buffer.h"],
    deps = [
        ":arithmetic",
        ":assert",
        ":buffer",
        ":external_data",
        ":initializer",
        ":shared_ptr",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "sized_shared_buffer",
    srcs = ["sized_shared_buffer.cc"],
    hdrs = ["sized_shared_buffer.h"],
    deps = [
        ":arithmetic",
        ":assert",
        ":buffering",
        ":shared_buffer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "chain_and_external_ref",
    srcs = ["chain.cc"],
    hdrs = [
        "chain_base.h",
        "chain_details.h",
        "external_ref_base.h",
        "external_ref_support.h",
    ],
    visibility = ["//visibility:private"],
    deps = [
        ":arithmetic",
        ":assert",
        ":buffering",
        ":bytes_ref",
        ":compare",
        ":cord_utils",
        ":external_data",
        ":initializer",
        ":iterable",
        ":memory_estimator",
        ":new_aligned",
        ":null_safe_memcpy",
        ":shared_ptr",
        ":stream_utils",
        ":string_utils",
        ":type_traits",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/meta:type_traits",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:resize_and_overwrite",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "chain",
    hdrs = ["chain.h"],
    deps = [":chain_and_external_ref"],
)

cc_library(
    name = "external_ref",
    hdrs = ["external_ref.h"],
    deps = [":chain_and_external_ref"],
)

cc_library(
    name = "byte_fill",
    srcs = ["byte_fill.cc"],
    hdrs = ["byte_fill.h"],
    deps = [
        ":arithmetic",
        ":assert",
        ":chain",
        ":compare",
        ":cord_utils",
        ":external_data",
        ":external_ref",
        ":global",
        ":iterable",
        ":shared_buffer",
        ":types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/numeric:bits",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "uninitialized_vector",
    hdrs = ["uninitialized_vector.h"],
    deps = [
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/container:inlined_vector",
    ],
)

cc_library(
    name = "compact_string",
    srcs = ["compact_string.cc"],
    hdrs = ["compact_string.h"],
    deps = [
        ":arithmetic",
        ":assert",
        ":bytes_ref",
        ":compare",
        ":estimated_allocated_size",
        ":external_data",
        ":new_aligned",
        ":null_safe_memcpy",
        ":type_traits",
        "@com_google_absl//absl/base:config",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/hash",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "optional_compact_string",
    hdrs = ["optional_compact_string.h"],
    deps = [
        ":assert",
        ":bytes_ref",
        ":compact_string",
        ":compare",
        ":iterable",
        ":type_traits",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "hybrid_direct_map",
    srcs = [
        "hybrid_direct_common.h",
        "hybrid_direct_internal.h",
    ],
    hdrs = ["hybrid_direct_map.h"],
    deps = [
        ":arithmetic",
        ":assert",
        ":compare",
        ":debug",
        ":initializer",
        ":iterable",
        ":type_traits",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/container:flat_hash_map",
    ],
)

cc_library(
    name = "hybrid_direct_set",
    srcs = [
        "hybrid_direct_common.h",
        "hybrid_direct_internal.h",
    ],
    hdrs = ["hybrid_direct_set.h"],
    deps = [
        ":arithmetic",
        ":assert",
        ":compare",
        ":iterable",
        ":type_traits",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/container:flat_hash_set",
    ],
)

cc_library(
    name = "binary_search",
    hdrs = ["binary_search.h"],
    deps = [
        ":compare",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
    ],
)

cc_library(
    name = "parallelism",
    srcs = ["parallelism.cc"],
    hdrs = ["parallelism.h"],
    visibility = ["//riegeli:__subpackages__"],
    deps = [
        ":assert",
        ":global",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/functional:any_invocable",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/time",
    ],
)

cc_library(
    name = "background_cleaning",
    srcs = ["background_cleaning.cc"],
    hdrs = ["background_cleaning.h"],
    deps = [
        ":assert",
        ":global",
        ":parallelism",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/time",
    ],
)

cc_library(
    name = "recycling_pool",
    hdrs = ["recycling_pool.h"],
    deps = [
        ":arithmetic",
        ":assert",
        ":background_cleaning",
        ":compare",
        ":global",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/container:node_hash_map",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/time",
    ],
)

cc_library(
    name = "options_parser",
    srcs = ["options_parser.cc"],
    hdrs = ["options_parser.h"],
    deps = [
        ":assert",
        ":initializer",
        ":object",
        ":string_ref",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/functional:any_invocable",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ],
)


================================================
FILE: riegeli/base/any.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_ANY_H_
#define RIEGELI_BASE_ANY_H_

#include <stddef.h>

#include <algorithm>
#include <cstddef>
#include <cstring>
#include <memory>
#include <new>  // IWYU pragma: keep
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/any_initializer.h"
#include "riegeli/base/any_internal.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/closing_ptr.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/dependency_base.h"
#include "riegeli/base/dependency_manager.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/memory_estimator.h"
#include "riegeli/base/temporary_storage.h"
#include "riegeli/base/type_id.h"
#include "riegeli/base/type_traits.h"

namespace riegeli {

namespace any_internal {

// Common base class of `Any` and `AnyRef`.
template <typename Handle, size_t inline_size, size_t inline_align>
class AnyBase : public WithEqual<AnyBase<Handle, inline_size, inline_align>> {
 public:
  // Returns a `Handle` to the `Manager`, or a default `Handle` for an empty
  // `AnyBase`.
  Handle get() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return methods_and_handle_.handle;
  }

  // If `Handle` supports `operator*`, `AnyBase<Handle>` can be used as a smart
  // pointer to the result of `operator*`, for convenience.
  template <typename DependentHandle = Handle,
            std::enable_if_t<HasDereference<DependentHandle>::value, int> = 0>
  decltype(*std::declval<DependentHandle>()) operator*() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    AssertNotNull("Failed precondition of AnyBase::operator*: null handle");
    return *get();
  }

  template <typename DependentHandle = Handle,
            std::enable_if_t<HasArrow<DependentHandle>::value, int> = 0>
  Handle operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    AssertNotNull("Failed precondition of AnyBase::operator->: null handle");
    return get();
  }

  // `AnyBase<Handle>` can be compared against `nullptr`. If `Handle` supports
  // `operator==` with `nullptr`, then delegates the comparison to `Handle`,
  // otherwise returns `true` for a non-empty `AnyBase`.
  friend bool operator==(const AnyBase& a, std::nullptr_t) {
    return a.EqualNullptr();
  }

  // If `true`, the `AnyBase` owns the dependent object, i.e. closing the host
  // object should close the dependent object.
  bool IsOwning() const {
    return methods_and_handle_.methods->is_owning(repr_.storage);
  }

  // If `true`, `get()` stays unchanged when an `AnyBase` is moved.
  static constexpr bool kIsStable = inline_size == 0;

  // If the stored `Manager` has exactly this type or a reference to it, returns
  // a pointer to the `Manager`. Otherwise returns `nullptr`.
  template <
      typename Manager,
      std::enable_if_t<SupportsDependency<Handle, Manager&&>::value, int> = 0>
  Manager* GetIf() ABSL_ATTRIBUTE_LIFETIME_BOUND;
  template <
      typename Manager,
      std::enable_if_t<SupportsDependency<Handle, Manager&&>::value, int> = 0>
  const Manager* GetIf() const ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Returns the `TypeId` corresponding to the stored `Manager` type, stripping
  // any toplevel reference.
  TypeId type_id() const { return methods_and_handle_.methods->type_id; }

  // Returns `true` when the stored `Manager` has exactly this type or a
  // reference to it.
  //
  // Same as `type_id() == TypeId::For<Manager>()`.
  //
  // Same as `GetIf<Manager>() != nullptr` but more efficient if the type
  // matches.
  template <
      typename Manager,
      std::enable_if_t<SupportsDependency<Handle, Manager&&>::value, int> = 0>
  bool Holds() const {
    return type_id() == TypeId::For<Manager>();
  }

  // Supports `MemoryEstimator`.
  friend void RiegeliRegisterSubobjects(const AnyBase* self,
                                        MemoryEstimator& memory_estimator) {
    self->methods_and_handle_.methods->register_subobjects(self->repr_.storage,
                                                           memory_estimator);
  }

 protected:
  // The state is left uninitialized.
  AnyBase() noexcept {}

  AnyBase(AnyBase&& that) noexcept;
  AnyBase& operator=(AnyBase&& that) noexcept;

  ~AnyBase() { Destroy(); }

  void Reset(std::nullptr_t = nullptr);

  // Initializes the state.
  //
  // If `Manager` is already a compatible `Any` or `AnyRef`, possibly wrapped in
  // `ClosingPtrType`, or an rvalue reference to it, adopts its storage instead
  // of keeping an indirection. This causes `GetIf()` to see through it.
  void Initialize();
  template <
      typename Manager,
      std::enable_if_t<
          std::conjunction_v<std::negation<IsAny<Handle, Manager>>,
                             std::negation<IsAnyClosingPtr<Handle, Manager>>>,
          int> = 0>
  void Initialize(Manager&& manager);
  template <typename Manager,
            std::enable_if_t<IsAny<Handle, Manager>::value, int> = 0>
  void Initialize(Manager&& manager);
  template <typename Manager,
            std::enable_if_t<IsAnyClosingPtr<Handle, Manager>::value, int> = 0>
  void Initialize(Manager&& manager);
  template <
      typename Manager,
      std::enable_if_t<
          std::conjunction_v<std::negation<IsAny<Handle, Manager>>,
                             std::negation<IsAnyClosingPtr<Handle, Manager>>>,
          int> = 0>
  void Initialize(Initializer<Manager> manager);
  template <typename Manager,
            std::enable_if_t<IsAny<Handle, Manager>::value, int> = 0>
  void Initialize(Initializer<Manager> manager);
  template <typename Manager,
            std::enable_if_t<IsAnyClosingPtr<Handle, Manager>::value, int> = 0>
  void Initialize(Initializer<Manager> manager);
  void InitializeFromAnyInitializer(AnyInitializer<Handle> manager);

  template <typename Manager,
            std::enable_if_t<!std::is_reference_v<Manager>, int> = 0>
  void Adopt(Manager&& manager);
  template <typename Manager,
            std::enable_if_t<std::is_rvalue_reference_v<Manager>, int> = 0>
  void Adopt(Manager&& manager);

  // Destroys the state, leaving it uninitialized.
  void Destroy();

 private:
  // For adopting the state from an instantiation with a different `inline_size`
  // and `inline_align`.
  template <typename OtherHandle, size_t other_inline_size,
            size_t other_inline_align>
  friend class AnyBase;
  // For adopting the state from an instantiation held in an `AnyInitializer`.
  friend class AnyInitializer<Handle>;

  using Repr = any_internal::Repr<Handle, inline_size, inline_align>;
  using MethodsAndHandle = any_internal::MethodsAndHandle<Handle>;
  using NullMethods = any_internal::NullMethods<Handle>;
  template <typename Manager>
  using MethodsFor = any_internal::MethodsFor<
      Handle, Manager, IsInline<Handle, Manager, inline_size, inline_align>()>;

  static constexpr size_t kAvailableSize =
      AvailableSize<Handle, inline_size, inline_align>();
  static constexpr size_t kAvailableAlign =
      AvailableAlign<Handle, inline_size, inline_align>();

  template <typename DependentHandle = Handle,
            std::enable_if_t<IsComparableAgainstNullptr<DependentHandle>::value,
                             int> = 0>
  void AssertNotNull(absl::string_view message) const {
    RIEGELI_ASSERT(get() != nullptr) << message;
  }
  template <typename DependentHandle = Handle,
            std::enable_if_t<
                !IsComparableAgainstNullptr<DependentHandle>::value, int> = 0>
  void AssertNotNull(ABSL_ATTRIBUTE_UNUSED absl::string_view message) const {}

  template <typename DependentHandle = Handle,
            std::enable_if_t<IsComparableAgainstNullptr<DependentHandle>::value,
                             int> = 0>
  bool EqualNullptr() const {
    return get() == nullptr;
  }
  template <typename DependentHandle = Handle,
            std::enable_if_t<
                !IsComparableAgainstNullptr<DependentHandle>::value, int> = 0>
  bool EqualNullptr() const {
    return methods_and_handle_.methods == &NullMethods::kMethods;
  }

  MethodsAndHandle methods_and_handle_;
  Repr repr_;
};

}  // namespace any_internal

// `Any<Handle>` refers to an optionally owned object which is accessed as
// `Handle` and stored as some `Manager` type decided when the `Any` is
// initialized.
//
// Often `Handle` is some pointer `Base*`, and then `Manager` can be e.g.
// `T*` (not owned), `T` (owned), or `std::unique_ptr<T>` (owned), with some `T`
// derived from `Base`.
//
// `Any<Handle>` holds a `Dependency<Handle, Manager>` for some `Manager` type,
// erasing the `Manager` parameter from the type of the `Any`, or is empty.
template <typename Handle, size_t inline_size = 0, size_t inline_align = 0>
class ABSL_NULLABILITY_COMPATIBLE Any
    : public any_internal::AnyBase<Handle, inline_size, inline_align> {
 private:
  // Indirection through `InliningImpl` is needed for MSVC for some reason.
  template <typename... InlineManagers>
  struct InliningImpl {
    using type =
        Any<Handle,
            UnsignedMax(inline_size,
                        sizeof(Dependency<Handle, InlineManagers>)...),
            UnsignedMax(inline_align,
                        alignof(Dependency<Handle, InlineManagers>)...)>;
  };

 public:
  // `Any<Handle>::Inlining<InlineManagers...>` enlarges inline storage of
  // `Any<Handle>`.
  //
  // `InlineManagers` specify the size of inline storage, which allows to avoid
  // heap allocation if `Manager` is among `InlineManagers`, or if
  // `Dependency<Handle, Manager>` fits there regarding size and alignment.
  // By default inline storage is enough for a pointer.
  template <typename... InlineManagers>
  using Inlining = typename InliningImpl<InlineManagers...>::type;

  // Creates an empty `Any`.
  Any() noexcept { this->Initialize(); }
  /*implicit*/ Any(std::nullptr_t) { this->Initialize(); }

  // Holds a `Dependency<Handle, TargetT<Manager>>`.
  //
  // If `TargetT<Manager>` is already a compatible `Any` or `AnyRef`, possibly
  // wrapped in `ClosingPtrType`, or an rvalue reference to it, adopts its
  // storage instead of keeping an indirection. This causes `GetIf()` to see
  // through it.
  template <typename Manager,
            std::enable_if_t<
                std::conjunction_v<NotSameRef<Any, TargetT<Manager>>,
                                   TargetSupportsDependency<Handle, Manager>>,
                int> = 0>
  /*implicit*/ Any(Manager&& manager);
  template <typename Manager,
            std::enable_if_t<
                std::conjunction_v<NotSameRef<Any, TargetT<Manager>>,
                                   TargetSupportsDependency<Handle, Manager>>,
                int> = 0>
  Any& operator=(Manager&& manager);

  // Holds the `Dependency` specified when the `AnyInitializer` was constructed.
  //
  // `AnyInitializer` is accepted as a template parameter to avoid this
  // constructor triggering implicit conversions of other parameter types to
  // `AnyInitializer`, which causes template instantiation cycles.
  template <typename Manager,
            std::enable_if_t<std::is_same_v<Manager, AnyInitializer<Handle>>,
                             int> = 0>
  /*implicit*/ Any(Manager manager);
  template <typename Manager,
            std::enable_if_t<std::is_same_v<Manager, AnyInitializer<Handle>>,
                             int> = 0>
  Any& operator=(Manager manager);

  // Assignment operator which materializes `Any` from its `Initializer`
  // except from the `Any` itself, which is handled below.
  template <typename Manager,
            std::enable_if_t<std::conjunction_v<SameRef<Any, TargetT<Manager>>,
                                                NotSameRef<Any, Manager>>,
                             int> = 0>
  Any& operator=(Manager&& manager) {
    riegeli::Reset(*this, std::forward<Manager>(manager));
    return *this;
  }

  Any(Any&& that) = default;
  Any& operator=(Any&& that) = default;

  // Makes `*this` equivalent to a newly constructed `Any`. This avoids
  // constructing a temporary `Any` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(std::nullptr_t = nullptr) {
    Any::AnyBase::Reset();
  }

 private:
  // For `ABSL_NULLABILITY_COMPATIBLE`.
  using pointer = std::conditional_t<std::is_pointer_v<Handle>, Handle, void*>;
};

// Specialization of `DependencyManagerImpl<Any<Handle>>`:
// a dependency with ownership determined at runtime.
template <typename Handle, size_t inline_size, size_t inline_align,
          typename ManagerStorage>
class DependencyManagerImpl<Any<Handle, inline_size, inline_align>,
                            ManagerStorage>
    : public DependencyBase<ManagerStorage> {
 public:
  using DependencyManagerImpl::DependencyBase::DependencyBase;

  bool IsOwning() const { return this->manager().IsOwning(); }

  static constexpr bool kIsStable =
      DependencyManagerImpl::DependencyBase::kIsStable ||
      Any<Handle, inline_size, inline_align>::kIsStable;

 protected:
  DependencyManagerImpl(DependencyManagerImpl&& that) = default;
  DependencyManagerImpl& operator=(DependencyManagerImpl&& that) = default;

  ~DependencyManagerImpl() = default;

  Handle ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return this->manager().get();
  }
};

// Specialization of
// `DependencyManagerImpl<ClosingPtrType<Any<Handle>>>`:
// a dependency with ownership determined at runtime.
template <typename Handle, size_t inline_size, size_t inline_align,
          typename ManagerStorage>
class DependencyManagerImpl<
    std::unique_ptr<Any<Handle, inline_size, inline_align>, NullDeleter>,
    ManagerStorage>
    : public DependencyBase<std::unique_ptr<
          Any<Handle, inline_size, inline_align>, NullDeleter>> {
 public:
  using DependencyManagerImpl::DependencyBase::DependencyBase;

  bool IsOwning() const {
    return this->manager() != nullptr && this->manager()->IsOwning();
  }

  static constexpr bool kIsStable = true;

 protected:
  DependencyManagerImpl(DependencyManagerImpl&& that) = default;
  DependencyManagerImpl& operator=(DependencyManagerImpl&& that) = default;

  ~DependencyManagerImpl() = default;

  Handle ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return this->manager()->get();
  }
};

// `AnyRef<Handle>` refers to an optionally owned object which is accessed as
// `Handle` and was passed as some `Manager` type decided when the `AnyRef` was
// initialized.
//
// Often `Handle` is some pointer `Base*`, and then `Manager` can be e.g.
// `T&` (not owned), `T&&` (owned), or `std::unique_ptr<T>` (owned), with some
// `T` derived from `Base`.
//
// `AnyRef<Handle>` holds a `Dependency<Handle, Manager&&>` (which collapses to
// `Dependency<Handle, Manager&>` if `Manager` is itself an lvalue reference)
// for some `Manager` type, erasing the `Manager` parameter from the type of the
// `AnyRef`, or is empty.
//
// `AnyRef<Handle>(manager)` does not own `manager`, even if it involves
// temporaries, hence it should be used only as a parameter of a function or
// constructor, so that the temporaries outlive its usage. Instead of storing an
// `AnyRef<Handle>` in a variable or returning it from a function, consider
// `riegeli::OwningMaker<Manager>()`, `MakerTypeFor<Manager, ManagerArgs...>`,
// or `Any<Handle>`.
//
// This allows to pass an unowned dependency by lvalue reference instead of by
// pointer, which allows for a more idiomatic API for passing an object which
// does not need to be valid after the function returns. And this allows to pass
// an owned dependency by rvalue reference instead of by value, which avoids
// moving it.
template <typename Handle>
class ABSL_NULLABILITY_COMPATIBLE AnyRef
    : public any_internal::AnyBase<Handle, 0, 0> {
 public:
  // Creates an empty `AnyRef`.
  AnyRef() noexcept { this->Initialize(); }
  /*implicit*/ AnyRef(std::nullptr_t) { this->Initialize(); }

  // Holds a `Dependency<Handle, TargetRefT<Manager>&&>` when
  // `TargetRefT<Manager>` is not a reference.
  //
  // If `TargetT<Manager>` is already a compatible `Any` or `AnyRef`, possibly
  // wrapped in `ClosingPtrType`, points to its storage instead of keeping an
  // indirection. This causes `GetIf()` to see through it.
  template <typename Manager,
            std::enable_if_t<
                std::conjunction_v<
                    NotSameRef<AnyRef, TargetT<Manager>>,
                    NotSameRef<Any<Handle>, TargetT<Manager>>,
                    std::negation<std::is_reference<TargetRefT<Manager>>>,
                    SupportsDependency<Handle, TargetRefT<Manager>&&>>,
                int> = 0>
  /*implicit*/ AnyRef(Manager&& manager ABSL_ATTRIBUTE_LIFETIME_BOUND,
                      TemporaryStorage<TargetRefT<Manager>>&& storage
                          ABSL_ATTRIBUTE_LIFETIME_BOUND = {});

  // Holds a `DependencyRef<Handle, Manager>` when `TargetRefT<Manager>` is a
  // reference.
  //
  // If `TargetT<Manager>` is an rvalue reference to an already a compatible
  // `Any` or `AnyRef`, possibly wrapped in `ClosingPtrType`, points to its
  // storage instead of keeping an indirection. This causes `GetIf()` to see
  // through it.
  //
  // This constructor is separate so that it does not need `storage`.
  template <typename Manager,
            std::enable_if_t<std::conjunction_v<
                                 NotSameRef<AnyRef, TargetT<Manager>>,
                                 NotSameRef<Any<Handle>, TargetT<Manager>>,
                                 std::is_reference<TargetRefT<Manager>>,
                                 TargetRefSupportsDependency<Handle, Manager>>,
                             int> = 0>
  /*implicit*/ AnyRef(Manager&& manager ABSL_ATTRIBUTE_LIFETIME_BOUND);

  // Adopts the `Dependency` from `Any<Handle>` with no inline storage.
  //
  // This constructor is separate so that it does not need temporary storage nor
  // `ABSL_ATTRIBUTE_LIFETIME_BOUND`.
  template <
      typename Manager,
      std::enable_if_t<std::is_same_v<TargetT<Manager>, Any<Handle>>, int> = 0>
  /*implicit*/ AnyRef(Manager&& manager);

  // Holds the `Dependency` specified when the `AnyInitializer` was constructed.
  //
  // Prefer taking parameters as `AnyRef<Handle>` instead of
  // `AnyInitializer<Handle>` if they are ultimately always converted to
  // `AnyRef<Handle>`, because this constructor may involve heap allocation.
  //
  // `AnyInitializer` is accepted as a template parameter to avoid this
  // constructor triggering implicit conversions of other parameter types to
  // `AnyInitializer`, which causes template instantiation cycles.
  template <typename Manager,
            std::enable_if_t<std::is_same_v<Manager, AnyInitializer<Handle>>,
                             int> = 0>
  /*implicit*/ AnyRef(Manager manager);

  AnyRef(AnyRef&& that) = default;
  AnyRef& operator=(AnyRef&&) = delete;

 private:
  // For `ABSL_NULLABILITY_COMPATIBLE`.
  using pointer = std::conditional_t<std::is_pointer_v<Handle>, Handle, void*>;
};

// Specialization of `DependencyManagerImpl<AnyRef<Handle>>`:
// a dependency with ownership determined at runtime.
template <typename Handle, typename ManagerStorage>
class DependencyManagerImpl<AnyRef<Handle>, ManagerStorage>
    : public DependencyBase<ManagerStorage> {
 public:
  using DependencyManagerImpl::DependencyBase::DependencyBase;

  bool IsOwning() const { return this->manager().IsOwning(); }

  static constexpr bool kIsStable =
      DependencyManagerImpl::DependencyBase::kIsStable ||
      AnyRef<Handle>::kIsStable;

 protected:
  DependencyManagerImpl(DependencyManagerImpl&& that) = default;
  DependencyManagerImpl& operator=(DependencyManagerImpl&& that) = default;

  ~DependencyManagerImpl() = default;

  Handle ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return this->manager().get();
  }
};

// Specialization of
// `DependencyManagerImpl<ClosingPtrType<AnyRef<Handle>, Deleter>>`:
// a dependency with ownership determined at runtime.
template <typename Handle, typename ManagerStorage>
class DependencyManagerImpl<std::unique_ptr<AnyRef<Handle>, NullDeleter>,
                            ManagerStorage>
    : public DependencyBase<std::unique_ptr<AnyRef<Handle>, NullDeleter>> {
 public:
  using DependencyManagerImpl::DependencyBase::DependencyBase;

  bool IsOwning() const {
    return this->manager() != nullptr && this->manager()->IsOwning();
  }

  static constexpr bool kIsStable = true;

 protected:
  DependencyManagerImpl(DependencyManagerImpl&& that) = default;
  DependencyManagerImpl& operator=(DependencyManagerImpl&& that) = default;

  ~DependencyManagerImpl() = default;

  Handle ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return this->manager()->get();
  }
};

// Type-erased object like `absl::string_view`, `std::string` or `const char*`
// which stores and possibly owns a string.
//
// Do not mutate a stored `std::string` in-place through `GetIf()` if its length
// changes. Assign a new one.
using AnyString =
    Any<absl::string_view>::Inlining<absl::string_view, std::string>;

// Implementation details follow.

namespace any_internal {

template <typename Handle, size_t inline_size, size_t inline_align>
inline AnyBase<Handle, inline_size, inline_align>::AnyBase(
    AnyBase&& that) noexcept {
  that.methods_and_handle_.methods->move(that.repr_.storage, repr_.storage,
                                         &methods_and_handle_);
  that.methods_and_handle_.methods = &NullMethods::kMethods;
  that.methods_and_handle_.handle = SentinelHandle<Handle>();
}

template <typename Handle, size_t inline_size, size_t inline_align>
inline AnyBase<Handle, inline_size, inline_align>&
AnyBase<Handle, inline_size, inline_align>::operator=(AnyBase&& that) noexcept {
  if (ABSL_PREDICT_TRUE(&that != this)) {
    Destroy();
    that.methods_and_handle_.methods->move(that.repr_.storage, repr_.storage,
                                           &methods_and_handle_);
    that.methods_and_handle_.methods = &NullMethods::kMethods;
    that.methods_and_handle_.handle = SentinelHandle<Handle>();
  }
  return *this;
}

template <typename Handle, size_t inline_size, size_t inline_align>
inline void AnyBase<Handle, inline_size, inline_align>::Initialize() {
  methods_and_handle_.methods = &NullMethods::kMethods;
  new (&methods_and_handle_.handle)
      Handle(any_internal::SentinelHandle<Handle>());
}

template <typename Handle, size_t inline_size, size_t inline_align>
template <typename Manager,
          std::enable_if_t<std::conjunction_v<
                               std::negation<IsAny<Handle, Manager>>,
                               std::negation<IsAnyClosingPtr<Handle, Manager>>>,
                           int>>
inline void AnyBase<Handle, inline_size, inline_align>::Initialize(
    Manager&& manager) {
  Initialize<Manager>(Initializer<Manager>(std::forward<Manager>(manager)));
}

template <typename Handle, size_t inline_size, size_t inline_align>
template <typename Manager,
          std::enable_if_t<IsAny<Handle, Manager>::value, int>>
inline void AnyBase<Handle, inline_size, inline_align>::Initialize(
    Manager&& manager) {
  using ManagerValue = std::remove_reference_t<Manager>;
  // `manager.methods_and_handle_.methods->used_size <=
  //      ManagerValue::kAvailableSize`, hence if
  // `ManagerValue::kAvailableSize <= kAvailableSize` then
  // `manager.methods_and_handle_.methods->used_size <= kAvailableSize`.
  // No need to check possibly at runtime.
  if ((ManagerValue::kAvailableSize <= kAvailableSize ||
       manager.methods_and_handle_.methods->used_size <= kAvailableSize) &&
      // Same for alignment.
      (ManagerValue::kAvailableAlign <= kAvailableAlign ||
       manager.methods_and_handle_.methods->used_align <= kAvailableAlign)) {
    // Adopt `manager` by moving its representation as is.
    manager.methods_and_handle_.methods->move(
        manager.repr_.storage, repr_.storage, &methods_and_handle_);
    manager.methods_and_handle_.methods = &NullMethods::kMethods;
    manager.methods_and_handle_.handle = SentinelHandle<Handle>();
    return;
  }
  // Adopt `manager` by moving its representation to the heap if `Manager` is
  // a value, or referring to it if `Manager` is a reference.
  Adopt<Manager>(std::forward<Manager>(manager));
}

template <typename Handle, size_t inline_size, size_t inline_align>
template <typename Manager,
          std::enable_if_t<IsAnyClosingPtr<Handle, Manager>::value, int>>
inline void AnyBase<Handle, inline_size, inline_align>::Initialize(
    Manager&& manager) {
  if (manager == nullptr) {
    Initialize();
    return;
  }
  // Adopt `*manager` by referring to its representation.
  manager->methods_and_handle_.methods->make_reference(
      manager->repr_.storage, repr_.storage, &methods_and_handle_);
}

template <typename Handle, size_t inline_size, size_t inline_align>
template <typename Manager,
          std::enable_if_t<std::conjunction_v<
                               std::negation<IsAny<Handle, Manager>>,
                               std::negation<IsAnyClosingPtr<Handle, Manager>>>,
                           int>>
inline void AnyBase<Handle, inline_size, inline_align>::Initialize(
    Initializer<Manager> manager) {
  methods_and_handle_.methods = &MethodsFor<Manager>::kMethods;
  MethodsFor<Manager>::Construct(repr_.storage, &methods_and_handle_.handle,
                                 std::move(manager));
}

template <typename Handle, size_t inline_size, size_t inline_align>
template <typename Manager,
          std::enable_if_t<IsAny<Handle, Manager>::value, int>>
inline void AnyBase<Handle, inline_size, inline_align>::Initialize(
    Initializer<Manager> manager) {
  // Materialize `Manager` to adopt its storage.
  Initialize<Manager>(std::move(manager).Reference());
}

template <typename Handle, size_t inline_size, size_t inline_align>
template <typename Manager,
          std::enable_if_t<IsAnyClosingPtr<Handle, Manager>::value, int>>
inline void AnyBase<Handle, inline_size, inline_align>::Initialize(
    Initializer<Manager> manager) {
  // Materialize `Manager` to adopt its storage.
  Initialize<Manager>(std::move(manager).Construct());
}

template <typename Handle, size_t inline_size, size_t inline_align>
inline void
AnyBase<Handle, inline_size, inline_align>::InitializeFromAnyInitializer(
    AnyInitializer<Handle> manager) {
  std::move(manager).Construct(repr_.storage, &methods_and_handle_,
                               kAvailableSize, kAvailableAlign);
}

template <typename Handle, size_t inline_size, size_t inline_align>
template <typename Manager,
          std::enable_if_t<!std::is_reference_v<Manager>, int>>
inline void AnyBase<Handle, inline_size, inline_align>::Adopt(
    Manager&& manager) {
  manager.methods_and_handle_.methods->move_to_heap(
      manager.repr_.storage, repr_.storage, &methods_and_handle_);
  manager.methods_and_handle_.methods = &NullMethods::kMethods;
  manager.methods_and_handle_.handle = SentinelHandle<Handle>();
}

template <typename Handle, size_t inline_size, size_t inline_align>
template <typename Manager,
          std::enable_if_t<std::is_rvalue_reference_v<Manager>, int>>
inline void AnyBase<Handle, inline_size, inline_align>::Adopt(
    Manager&& manager) {
  manager.methods_and_handle_.methods->make_reference(
      manager.repr_.storage, repr_.storage, &methods_and_handle_);
}

template <typename Handle, size_t inline_size, size_t inline_align>
inline void AnyBase<Handle, inline_size, inline_align>::Destroy() {
  methods_and_handle_.methods->destroy(repr_.storage);
  methods_and_handle_.handle.~Handle();
}

template <typename Handle, size_t inline_size, size_t inline_align>
inline void AnyBase<Handle, inline_size, inline_align>::Reset(std::nullptr_t) {
  methods_and_handle_.methods->destroy(repr_.storage);
  methods_and_handle_.methods = &NullMethods::kMethods;
  methods_and_handle_.handle = SentinelHandle<Handle>();
}

template <typename Handle, size_t inline_size, size_t inline_align>
template <typename Manager,
          std::enable_if_t<SupportsDependency<Handle, Manager&&>::value, int>>
inline Manager* AnyBase<Handle, inline_size, inline_align>::GetIf()
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  if (!Holds<Manager>()) return nullptr;
  return &methods_and_handle_.methods->get_raw_manager(repr_.storage)
              .template Cast<Manager&>();
}

template <typename Handle, size_t inline_size, size_t inline_align>
template <typename Manager,
          std::enable_if_t<SupportsDependency<Handle, Manager&&>::value, int>>
inline const Manager* AnyBase<Handle, inline_size, inline_align>::GetIf() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  if (!Holds<Manager>()) return nullptr;
  return &methods_and_handle_.methods->get_raw_manager(repr_.storage)
              .template Cast<const Manager&>();
}

}  // namespace any_internal

template <typename Handle, size_t inline_size, size_t inline_align>
template <
    typename Manager,
    std::enable_if_t<
        std::conjunction_v<NotSameRef<Any<Handle, inline_size, inline_align>,
                                      TargetT<Manager>>,
                           TargetSupportsDependency<Handle, Manager>>,
        int>>
inline Any<Handle, inline_size, inline_align>::Any(Manager&& manager) {
  this->template Initialize<TargetT<Manager>>(std::forward<Manager>(manager));
}

template <typename Handle, size_t inline_size, size_t inline_align>
template <
    typename Manager,
    std::enable_if_t<
        std::conjunction_v<NotSameRef<Any<Handle, inline_size, inline_align>,
                                      TargetT<Manager>>,
                           TargetSupportsDependency<Handle, Manager>>,
        int>>
inline Any<Handle, inline_size, inline_align>&
Any<Handle, inline_size, inline_align>::operator=(Manager&& manager) {
  this->Destroy();
  this->template Initialize<TargetT<Manager>>(std::forward<Manager>(manager));
  return *this;
}

template <typename Handle, size_t inline_size, size_t inline_align>
template <
    typename Manager,
    std::enable_if_t<std::is_same_v<Manager, AnyInitializer<Handle>>, int>>
inline Any<Handle, inline_size, inline_align>::Any(Manager manager) {
  this->InitializeFromAnyInitializer(std::move(manager));
}

template <typename Handle, size_t inline_size, size_t inline_align>
template <
    typename Manager,
    std::enable_if_t<std::is_same_v<Manager, AnyInitializer<Handle>>, int>>
inline Any<Handle, inline_size, inline_align>&
Any<Handle, inline_size, inline_align>::operator=(Manager manager) {
  this->Destroy();
  this->InitializeFromAnyInitializer(std::move(manager));
  return *this;
}

template <typename Handle>
template <
    typename Manager,
    std::enable_if_t<std::conjunction_v<
                         NotSameRef<AnyRef<Handle>, TargetT<Manager>>,
                         NotSameRef<Any<Handle>, TargetT<Manager>>,
                         std::negation<std::is_reference<TargetRefT<Manager>>>,
                         SupportsDependency<Handle, TargetRefT<Manager>&&>>,
                     int>>
inline AnyRef<Handle>::AnyRef(Manager&& manager ABSL_ATTRIBUTE_LIFETIME_BOUND,
                              TemporaryStorage<TargetRefT<Manager>>&& storage
                                  ABSL_ATTRIBUTE_LIFETIME_BOUND) {
  this->template Initialize<TargetRefT<Manager>&&>(
      Initializer<TargetRefT<Manager>>(std::forward<Manager>(manager))
          .Reference(std::move(storage)));
}

template <typename Handle>
template <typename Manager,
          std::enable_if_t<
              std::conjunction_v<NotSameRef<AnyRef<Handle>, TargetT<Manager>>,
                                 NotSameRef<Any<Handle>, TargetT<Manager>>,
                                 std::is_reference<TargetRefT<Manager>>,
                                 TargetRefSupportsDependency<Handle, Manager>>,
              int>>
inline AnyRef<Handle>::AnyRef(Manager&& manager ABSL_ATTRIBUTE_LIFETIME_BOUND) {
  this->template Initialize<TargetRefT<Manager>>(
      std::forward<Manager>(manager));
}

template <typename Handle>
template <typename Manager,
          std::enable_if_t<std::is_same_v<TargetT<Manager>, Any<Handle>>, int>>
inline AnyRef<Handle>::AnyRef(Manager&& manager) {
  this->template Initialize<TargetT<Manager>>(std::forward<Manager>(manager));
}

template <typename Handle>
template <
    typename Manager,
    std::enable_if_t<std::is_same_v<Manager, AnyInitializer<Handle>>, int>>
inline AnyRef<Handle>::AnyRef(Manager manager) {
  this->InitializeFromAnyInitializer(std::move(manager));
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_ANY_H_


================================================
FILE: riegeli/base/any_initializer.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_ANY_INITIALIZER_H_
#define RIEGELI_BASE_ANY_INITIALIZER_H_

#include <stddef.h>

#include <cstddef>
#include <cstring>
#include <new>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "riegeli/base/any_internal.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/type_erased_ref.h"
#include "riegeli/base/type_traits.h"

namespace riegeli {

namespace any_internal {
template <typename Handle, size_t inline_size, size_t inline_align>
class AnyBase;
}  // namespace any_internal

// A parameter of type `AnyInitializer<Handle>` allows the caller to specify an
// `Any<Handle>` by passing a value convertible to `Any<Handle>`.
//
// In contrast to accepting `Any<Handle>` directly, this allows to construct the
// object in-place, avoiding constructing a temporary and moving from it. This
// also avoids specifying `::Inlining<...>` in the interface while benefiting
// from that in the implementation.
//
// `AnyInitializer<Handle>` also allows to initialize an `Any<Handle>` for a
// `Handle` type which is neither default-constructible nor supporting
// `RiegeliDependencySentinel()`, which makes `Any<Handle>` immovable.
//
// `AnyInitializer<Handle>` is similar to `Initializer<Any<Handle>>`, except
// that it efficiently handles `Any<Handle>` specializations with any inline
// storage constraints.
//
// `AnyInitializer<Handle>(manager)` does not own `manager`, even if it involves
// temporaries, hence it should be used only as a parameter of a function or
// constructor, so that the temporaries outlive its usage. Instead of storing
// an `AnyInitializer<Handle>` in a variable or returning it from a function,
// consider `riegeli::OwningMaker<Manager>(manager_args...)`,
// `MakerTypeFor<Manager, ManagerArgs...>`, or `Any<Handle>`.
template <typename Handle>
class ABSL_NULLABILITY_COMPATIBLE AnyInitializer {
 public:
  // An `Any` will be empty.
  AnyInitializer() noexcept : construct_(ConstructMethodEmpty) {}
  /*implicit*/ AnyInitializer(std::nullptr_t)
      : construct_(ConstructMethodEmpty) {}

  // An `Any` will hold a `Dependency<Handle, TargetT<Manager>>`.
  //
  // If `TargetT<Manager>` is already a compatible `Any` or `AnyRef`, possibly
  // wrapped in `ClosingPtrType`, or an rvalue reference to it, adopts its
  // storage instead of keeping an indirection. This causes `GetIf()` to see
  // through it.
  template <typename Manager,
            std::enable_if_t<
                std::conjunction_v<NotSameRef<AnyInitializer, TargetT<Manager>>,
                                   TargetSupportsDependency<Handle, Manager>>,
                int> = 0>
  /*implicit*/ AnyInitializer(Manager&& manager ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : construct_(ConstructMethod<Manager>),
        context_(std::forward<Manager>(manager)) {}

  AnyInitializer(AnyInitializer&& that) = default;
  AnyInitializer& operator=(AnyInitializer&&) = delete;

 private:
  // For `Construct()`.
  template <typename OtherHandle, size_t inline_size, size_t inline_align>
  friend class any_internal::AnyBase;

  // For `ABSL_NULLABILITY_COMPATIBLE`.
  using pointer = std::conditional_t<std::is_pointer_v<Handle>, Handle, void*>;

  using Storage = any_internal::Storage;
  using MethodsAndHandle = any_internal::MethodsAndHandle<Handle>;
  using NullMethods = any_internal::NullMethods<Handle>;
  template <typename Manager, bool is_inline>
  using MethodsFor = any_internal::MethodsFor<Handle, Manager, is_inline>;

  static void ConstructMethodEmpty(TypeErasedRef context, Storage dest,
                                   MethodsAndHandle* dest_methods_and_handle,
                                   size_t available_size,
                                   size_t available_align);

  template <
      typename Manager,
      std::enable_if_t<
          std::conjunction_v<
              std::negation<any_internal::IsAny<Handle, TargetT<Manager>>>,
              std::negation<
                  any_internal::IsAnyClosingPtr<Handle, TargetT<Manager>>>>,
          int> = 0>
  static void ConstructMethod(TypeErasedRef context, Storage dest,
                              MethodsAndHandle* dest_methods_and_handle,
                              size_t available_size, size_t available_align);
  template <typename Manager,
            std::enable_if_t<
                any_internal::IsAny<Handle, TargetT<Manager>>::value, int> = 0>
  static void ConstructMethod(TypeErasedRef context, Storage dest,
                              MethodsAndHandle* dest_methods_and_handle,
                              size_t available_size, size_t available_align);
  template <typename Manager,
            std::enable_if_t<
                any_internal::IsAnyClosingPtr<Handle, TargetT<Manager>>::value,
                int> = 0>
  static void ConstructMethod(TypeErasedRef context, Storage dest,
                              MethodsAndHandle* dest_methods_and_handle,
                              size_t available_size, size_t available_align);

  template <typename Target,
            std::enable_if_t<!std::is_reference_v<Target>, int> = 0>
  static void Adopt(Target&& target, Storage dest,
                    MethodsAndHandle* dest_methods_and_handle);
  template <typename Target,
            std::enable_if_t<std::is_rvalue_reference_v<Target>, int> = 0>
  static void Adopt(Target&& target, Storage dest,
                    MethodsAndHandle* dest_methods_and_handle);

  // Constructs `dest` with `*dest_methods_and_handle` by moving from `*this`.
  void Construct(Storage dest, MethodsAndHandle* dest_methods_and_handle,
                 size_t available_size, size_t available_align) && {
    construct_(context_, dest, dest_methods_and_handle, available_size,
               available_align);
  }

  void (*construct_)(TypeErasedRef context, Storage dest,
                     MethodsAndHandle* dest_methods_and_handle,
                     size_t available_size, size_t available_align);
  TypeErasedRef context_;
};

// Implementation details follow.

template <typename Handle>
void AnyInitializer<Handle>::ConstructMethodEmpty(
    ABSL_ATTRIBUTE_UNUSED TypeErasedRef context,
    ABSL_ATTRIBUTE_UNUSED Storage dest,
    MethodsAndHandle* dest_methods_and_handle,
    ABSL_ATTRIBUTE_UNUSED size_t available_size,
    ABSL_ATTRIBUTE_UNUSED size_t available_align) {
  dest_methods_and_handle->methods = &NullMethods::kMethods;
  new (&dest_methods_and_handle->handle)
      Handle(any_internal::SentinelHandle<Handle>());
}

template <typename Handle>
template <typename Manager,
          std::enable_if_t<
              std::conjunction_v<
                  std::negation<any_internal::IsAny<Handle, TargetT<Manager>>>,
                  std::negation<
                      any_internal::IsAnyClosingPtr<Handle, TargetT<Manager>>>>,
              int>>
void AnyInitializer<Handle>::ConstructMethod(
    TypeErasedRef context, Storage dest,
    MethodsAndHandle* dest_methods_and_handle, size_t available_size,
    size_t available_align) {
  using Target = TargetT<Manager>;
  // This is equivalent to calling `MethodsFor<Target, true>::Construct()`
  // or `MethodsFor<Target, false>::Construct()`. Separate allocation of
  // `Dependency<Handle, Target>` from its construction, so that the code for
  // construction can be shared between the two cases, reducing the code size.
  Dependency<Handle, Target>* dep_ptr;
  const any_internal::Methods<Handle>* methods_ptr;
  if (any_internal::ReprIsInline<Handle, Target>(available_size,
                                                 available_align)) {
    dep_ptr = reinterpret_cast<Dependency<Handle, Target>*>(dest);
    methods_ptr = &MethodsFor<Target, true>::kMethods;
  } else {
    if constexpr (alignof(Dependency<Handle, Target>) >
                  __STDCPP_DEFAULT_NEW_ALIGNMENT__) {
      dep_ptr = static_cast<Dependency<Handle, Target>*>(operator new(
          sizeof(Dependency<Handle, Target>),
          std::align_val_t(alignof(Dependency<Handle, Target>))));
    } else {
      dep_ptr = static_cast<Dependency<Handle, Target>*>(operator new(
          sizeof(Dependency<Handle, Target>)));
    }
    new (dest) Dependency<Handle, Target>*(dep_ptr);
    methods_ptr = &MethodsFor<Target, false>::kMethods;
  }
  new (dep_ptr) Dependency<Handle, Target>(context.Cast<Manager>());
  dest_methods_and_handle->methods = methods_ptr;
  new (&dest_methods_and_handle->handle) Handle(dep_ptr->get());
}

template <typename Handle>
template <
    typename Manager,
    std::enable_if_t<any_internal::IsAny<Handle, TargetT<Manager>>::value, int>>
void AnyInitializer<Handle>::ConstructMethod(
    TypeErasedRef context, Storage dest,
    MethodsAndHandle* dest_methods_and_handle, size_t available_size,
    size_t available_align) {
  using Target = TargetT<Manager>;
  using TargetValue = std::remove_reference_t<Target>;
  // Materialize `Target` to adopt its storage.
  [&](Target&& target) {
    // `target.methods_and_handle_.methods->used_size <=
    //      TargetValue::kAvailableSize`, hence if
    // `TargetValue::kAvailableSize == 0` then
    // `target.methods_and_handle_.methods->used_size <= available_size`.
    // No need to check possibly at runtime.
    if ((TargetValue::kAvailableSize == 0 ||
         target.methods_and_handle_.methods->used_size <= available_size) &&
        // Same for alignment.
        (TargetValue::kAvailableAlign == 0 ||
         target.methods_and_handle_.methods->used_align <= available_align)) {
      // Adopt `target` instead of wrapping it.
      target.methods_and_handle_.methods->move(target.repr_.storage, dest,
                                               dest_methods_and_handle);
      target.methods_and_handle_.methods = &NullMethods::kMethods;
      target.methods_and_handle_.handle =
          any_internal::SentinelHandle<Handle>();
      return;
    }
    Adopt<Target>(std::forward<Target>(target), dest, dest_methods_and_handle);
  }(Initializer<Target>(context.Cast<Manager>()).Reference());
}

template <typename Handle>
template <
    typename Manager,
    std::enable_if_t<
        any_internal::IsAnyClosingPtr<Handle, TargetT<Manager>>::value, int>>
void AnyInitializer<Handle>::ConstructMethod(
    TypeErasedRef context, Storage dest,
    MethodsAndHandle* dest_methods_and_handle,
    ABSL_ATTRIBUTE_UNUSED size_t available_size,
    ABSL_ATTRIBUTE_UNUSED size_t available_align) {
  using Target = TargetT<Manager>;
  // Materialize `Target` to adopt its storage.
  const Target target =
      Initializer<Target>(context.Cast<Manager>()).Construct();
  if (target == nullptr) {
    dest_methods_and_handle->methods = &NullMethods::kMethods;
    new (&dest_methods_and_handle->handle)
        Handle(any_internal::SentinelHandle<Handle>());
    return;
  }
  // Adopt `*manager` by referring to its representation.
  target->methods_and_handle_.methods->make_reference(
      target->repr_.storage, dest, dest_methods_and_handle);
}

template <typename Handle>
template <typename Target, std::enable_if_t<!std::is_reference_v<Target>, int>>
inline void AnyInitializer<Handle>::Adopt(
    Target&& target, Storage dest, MethodsAndHandle* dest_methods_and_handle) {
  target.methods_and_handle_.methods->move_to_heap(target.repr_.storage, dest,
                                                   dest_methods_and_handle);
  target.methods_and_handle_.methods = &NullMethods::kMethods;
  target.methods_and_handle_.handle = any_internal::SentinelHandle<Handle>();
}

template <typename Handle>
template <typename Target,
          std::enable_if_t<std::is_rvalue_reference_v<Target>, int>>
inline void AnyInitializer<Handle>::Adopt(
    Target&& target, Storage dest, MethodsAndHandle* dest_methods_and_handle) {
  target.methods_and_handle_.methods->make_reference(target.repr_.storage, dest,
                                                     dest_methods_and_handle);
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_ANY_INITIALIZER_H_


================================================
FILE: riegeli/base/any_internal.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_ANY_INTERNAL_H_
#define RIEGELI_BASE_ANY_INTERNAL_H_

#include <stddef.h>

#include <algorithm>
#include <memory>
#include <new>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/meta/type_traits.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/closing_ptr.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/dependency_base.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/memory_estimator.h"
#include "riegeli/base/type_erased_ref.h"
#include "riegeli/base/type_id.h"

namespace riegeli {

template <typename Handle, size_t inline_size, size_t inline_align>
class Any;
template <typename Handle>
class AnyRef;

namespace any_internal {

// Variants of `Repr`:
//  * Empty `Any`: `Repr` is not used
//  * Stored inline: `storage` holds `Dependency<Handle, Manager>`
//  * Held by pointer: `storage` holds `Dependency<Handle, Manager>*`
template <typename Handle, size_t inline_size, size_t inline_align>
struct Repr {
  // clang-format off
  alignas(UnsignedMax(alignof(void*), inline_align))
      char storage[UnsignedMax(sizeof(void*), inline_size)];
  // clang-format on
};

// By convention, a parameter of type `Storage` points to
// `Repr<Handle, inline_size, inline_align>::storage`.
using Storage = char[];

// A `Dependency<Handle, Manager>` is stored inline in
// `Repr<Handle, inline_size, inline_align>` if it is movable and it fits there.
//
// If `inline_size == 0`, the dependency is also required to be stable (because
// then `Any` declares itself stable) and trivially relocatable (because then
// `Any` declares itself with trivial ABI and optimizes moving to a plain memory
// copy of the representation).

// Properties of inline storage in an `Any` instance are expressed as two
// numbers: `available_size` and `available_align`, while constraints of a
// movable `Dependency` instance on its storage are expressed as two numbers:
// `used_size` and `used_align`, such that
// `used_size <= available_size && used_align <= available_align` implies that
// the movable `Dependency` can be stored inline in the `Any`.
//
// This formulation allows reevaluating the condition with different values of
// `available_size` and `available_align` when considering adopting the storage
// for a different `Any` instance, at either compile time or runtime.

// Returns `available_size`: `sizeof` the storage, except that 0 indicates
// `inline_size == 0`, which means the minimal size of any inline storage with
// the given alignment, while also putting additional constraints on the
// `Dependency` (stability and trivial relocatability).
template <typename Handle, size_t inline_size, size_t inline_align>
constexpr size_t AvailableSize() {
  if (inline_size == 0) return 0;
  return sizeof(Repr<Handle, inline_size, inline_align>);
}

// Returns `available_align`: `alignof` the storage, except that 0 means the
// minimal alignment of any inline storage.
template <typename Handle, size_t inline_size, size_t inline_align>
constexpr size_t AvailableAlign() {
  if (alignof(Repr<Handle, inline_size, inline_align>) ==
      alignof(Repr<Handle, 0, 0>)) {
    return 0;
  }
  return alignof(Repr<Handle, inline_size, inline_align>);
}

// Returns `used_size`: `sizeof` the `Dependency`, except that 0 indicates
// compatibility with `inline_size == 0`, which means fitting under the minimal
// size of any inline storage with the given alignment, and being stable.
template <typename Handle, typename Manager>
constexpr size_t UsedSize() {
  if (sizeof(Dependency<Handle, Manager>) <=
          sizeof(Repr<Handle, 0, alignof(Dependency<Handle, Manager>)>) &&
      Dependency<Handle, Manager>::kIsStable) {
    return 0;
  }
  return sizeof(Dependency<Handle, Manager>);
}

// Returns `used_align`: `alignof` the storage, except that 0 means fitting
// under the minimal alignment of any inline storage. Making this a special
// case allows to optimize out comparisons of a compile time `used_align`
// against a runtime `available_align`.
template <typename Handle, typename Manager>
constexpr size_t UsedAlign() {
  if (alignof(Dependency<Handle, Manager>) <= alignof(Repr<Handle, 0, 0>)) {
    return 0;
  }
  return alignof(Dependency<Handle, Manager>);
}

template <typename Handle, typename Manager>
constexpr bool ReprIsInline(size_t available_size, size_t available_align) {
  return std::is_move_constructible_v<Dependency<Handle, Manager>> &&
         UsedSize<Handle, Manager>() <= available_size &&
         UsedAlign<Handle, Manager>() <= available_align;
}

template <typename Handle, typename Manager, size_t inline_size,
          size_t inline_align>
constexpr bool IsInline() {
  return ReprIsInline<Handle, Manager>(
      AvailableSize<Handle, inline_size, inline_align>(),
      AvailableAlign<Handle, inline_size, inline_align>());
}

template <typename Handle>
struct MethodsAndHandle;

// Method pointers.
template <typename Handle>
struct Methods {
  // Destroys `self`.
  void (*destroy)(Storage self);
  // Constructs `dest` with `*dest_handle` by moving from `src`. Destroys `src`.
  void (*move)(Storage src, Storage dest,
               MethodsAndHandle<Handle>* dest_methods_and_handle);
  // Constructs a differently represented `dest` with `*dest_methods_and_handle`
  // by moving from `src` to the heap and pointing `dest` to that. Destroys
  // `src`. Used only if `used_size > 0`.
  void (*move_to_heap)(Storage src, Storage dest,
                       MethodsAndHandle<Handle>* dest_methods_and_handle);
  // Constructs a differently represented `dest` with `*dest_methods_and_handle`
  // by pointing `dest` to `src`.
  void (*make_reference)(Storage src, Storage dest,
                         MethodsAndHandle<Handle>* dest_methods_and_handle);
  size_t used_size;
  size_t used_align;
  TypeId type_id;
  bool (*is_owning)(const Storage self);
  // Returns the `Manager&` stored in `self`, with the `Manager` type
  // corresponding to `type_id`. Used only if `type_id != nullptr`.
  // If `self` is const then `Manager` should be const, otherwise `Manager`
  // can be non-const.
  TypeErasedRef (*get_raw_manager)(const Storage self);
  void (*register_subobjects)(const Storage self,
                              MemoryEstimator& memory_estimator);
};

// Grouped members so that their address can be passed together.
template <typename Handle>
struct MethodsAndHandle {
  MethodsAndHandle() noexcept {}

  const Methods<Handle>* methods;
  union {
    Handle handle;
  };
};

template <typename Handle>
struct NullMethods;
template <typename Handle, typename Manager>
struct MethodsForReference;
template <typename Handle, typename Manager, bool is_inline>
struct MethodsFor;

// `IsAny` detects `Any` or `AnyRef` type with the given `Handle`, or an rvalue
// reference to it.

template <typename Handle, typename T>
struct IsAny : std::false_type {};

template <typename Handle, size_t inline_size, size_t inline_align>
struct IsAny<Handle, Any<Handle, inline_size, inline_align>> : std::true_type {
};

template <typename Handle>
struct IsAny<Handle, AnyRef<Handle>> : std::true_type {};

template <typename Handle, typename T>
struct IsAny<Handle, T&&> : IsAny<Handle, T> {};

// `IsAnyClosingPtr` detects `Any` or `AnyRef` type with the given `Handle`,
// wrapped in `ClosingPtrType` or in an rvalue reference to it.

template <typename Handle, typename T>
struct IsAnyClosingPtr : std::false_type {};

template <typename Handle, size_t inline_size, size_t inline_align>
struct IsAnyClosingPtr<
    Handle,
    std::unique_ptr<Any<Handle, inline_size, inline_align>, NullDeleter>>
    : std::true_type {};

template <typename Handle>
struct IsAnyClosingPtr<Handle, std::unique_ptr<AnyRef<Handle>, NullDeleter>>
    : std::true_type {};

template <typename Handle, typename T>
struct IsAnyClosingPtr<Handle, T&&> : IsAnyClosingPtr<Handle, T> {};

template <typename Handle>
inline Handle SentinelHandle() {
  return Initializer<Handle>(
      RiegeliDependencySentinel(static_cast<Handle*>(nullptr)));
}

// Implementation details follow.

template <typename Handle>
struct NullMethods {
 private:
  static void Destroy(ABSL_ATTRIBUTE_UNUSED Storage self) {}
  static void Move(ABSL_ATTRIBUTE_UNUSED Storage src,
                   ABSL_ATTRIBUTE_UNUSED Storage dest,
                   MethodsAndHandle<Handle>* dest_methods_and_handle) {
    dest_methods_and_handle->methods = &kMethods;
    new (&dest_methods_and_handle->handle) Handle(SentinelHandle<Handle>());
  }
  static bool IsOwning(ABSL_ATTRIBUTE_UNUSED const Storage self) {
    return false;
  }
  static void RegisterSubobjects(
      ABSL_ATTRIBUTE_UNUSED const Storage self,
      ABSL_ATTRIBUTE_UNUSED MemoryEstimator& memory_estimator) {}

 public:
  static constexpr Methods<Handle> kMethods = {
      Destroy, Move,    nullptr,  Move,    0,
      0,       nullptr, IsOwning, nullptr, RegisterSubobjects};
};

template <typename Handle, typename Manager>
struct MethodsForReference {
 private:
  static Dependency<Handle, Manager>* dep_ptr(const Storage self) {
    return *std::launder(
        reinterpret_cast<Dependency<Handle, Manager>* const*>(self));
  }

  static void Destroy(ABSL_ATTRIBUTE_UNUSED Storage self) {}
  static void Move(Storage src, Storage dest,
                   MethodsAndHandle<Handle>* dest_methods_and_handle) {
    new (dest) Dependency<Handle, Manager>*(dep_ptr(src));
    dest_methods_and_handle->methods = &kMethods;
    new (&dest_methods_and_handle->handle) Handle(dep_ptr(dest)->get());
  }
  static bool IsOwning(const Storage self) { return dep_ptr(self)->IsOwning(); }
  static TypeErasedRef GetRawManager(const Storage self) {
    return TypeErasedRef(dep_ptr(self)->manager());
  }
  static void RegisterSubobjects(
      ABSL_ATTRIBUTE_UNUSED const Storage self,
      ABSL_ATTRIBUTE_UNUSED MemoryEstimator& memory_estimator) {}

 public:
  static constexpr Methods<Handle> kMethods = {
      Destroy,
      Move,
      nullptr,
      Move,
      0,
      0,
      TypeId::For<absl::remove_cvref_t<Manager>>(),
      IsOwning,
      GetRawManager,
      RegisterSubobjects};
};

template <typename Handle, typename Manager>
struct MethodsFor<Handle, Manager, false> {
  static void Construct(Storage self, Handle* self_handle,
                        Initializer<Manager> manager) {
    new (self) Dependency<Handle, Manager>*(
        new Dependency<Handle, Manager>(std::move(manager)));
    new (self_handle) Handle(dep_ptr(self)->get());
  }

 private:
  static Dependency<Handle, Manager>* dep_ptr(const Storage self) {
    return *std::launder(
        reinterpret_cast<Dependency<Handle, Manager>* const*>(self));
  }

  static void Destroy(Storage self) { delete dep_ptr(self); }
  static void Move(Storage src, Storage dest,
                   MethodsAndHandle<Handle>* dest_methods_and_handle) {
    new (dest) Dependency<Handle, Manager>*(dep_ptr(src));
    dest_methods_and_handle->methods = &kMethods;
    new (&dest_methods_and_handle->handle) Handle(dep_ptr(dest)->get());
  }
  static void MakeReference(Storage src, Storage dest,
                            MethodsAndHandle<Handle>* dest_methods_and_handle) {
    new (dest) Dependency<Handle, Manager>*(dep_ptr(src));
    dest_methods_and_handle->methods =
        &MethodsForReference<Handle, Manager>::kMethods;
    new (&dest_methods_and_handle->handle) Handle(dep_ptr(dest)->get());
  }
  static bool IsOwning(const Storage self) { return dep_ptr(self)->IsOwning(); }
  static TypeErasedRef GetRawManager(const Storage self) {
    return TypeErasedRef(dep_ptr(self)->manager());
  }
  static void RegisterSubobjects(const Storage self,
                                 MemoryEstimator& memory_estimator) {
    memory_estimator.RegisterDynamicObject(dep_ptr(self));
  }

 public:
  static constexpr Methods<Handle> kMethods = {
      Destroy,
      Move,
      nullptr,
      MakeReference,
      0,
      0,
      TypeId::For<absl::remove_cvref_t<Manager>>(),
      IsOwning,
      GetRawManager,
      RegisterSubobjects};
};

template <typename Handle, typename Manager>
struct MethodsFor<Handle, Manager, true> {
  static void Construct(Storage self, Handle* self_handle,
                        Initializer<Manager> manager) {
    new (self) Dependency<Handle, Manager>(std::move(manager));
    new (self_handle) Handle(dep(self).get());
  }

 private:
  static Dependency<Handle, Manager>& dep(Storage self) {
    return *std::launder(reinterpret_cast<Dependency<Handle, Manager>*>(self));
  }
  static const Dependency<Handle, Manager>& dep(const Storage self) {
    return *std::launder(
        reinterpret_cast<const Dependency<Handle, Manager>*>(self));
  }
  static Dependency<Handle, Manager>* dep_ptr(const Storage self) {
    return *std::launder(
        reinterpret_cast<Dependency<Handle, Manager>* const*>(self));
  }

  static void Destroy(Storage self) {
    dep(self).~Dependency<Handle, Manager>();
  }
  static void Move(Storage src, Storage dest,
                   MethodsAndHandle<Handle>* dest_methods_and_handle) {
    new (dest) Dependency<Handle, Manager>(std::move(dep(src)));
    dep(src).~Dependency<Handle, Manager>();
    dest_methods_and_handle->methods = &kMethods;
    new (&dest_methods_and_handle->handle) Handle(dep(dest).get());
  }
  static void MoveToHeap(Storage src, Storage dest,
                         MethodsAndHandle<Handle>* dest_methods_and_handle) {
    new (dest) Dependency<Handle, Manager>*(
        new Dependency<Handle, Manager>(std::move(dep(src))));
    dep(src).~Dependency<Handle, Manager>();
    dest_methods_and_handle->methods =
        &MethodsFor<Handle, Manager, false>::kMethods;
    new (&dest_methods_and_handle->handle) Handle(dep_ptr(dest)->get());
  }
  static void MakeReference(Storage src, Storage dest,
                            MethodsAndHandle<Handle>* dest_methods_and_handle) {
    new (dest) Dependency<Handle, Manager>*(&dep(src));
    dest_methods_and_handle->methods =
        &MethodsForReference<Handle, Manager>::kMethods;
    new (&dest_methods_and_handle->handle) Handle(dep_ptr(dest)->get());
  }
  static bool IsOwning(const Storage self) { return dep(self).IsOwning(); }
  static TypeErasedRef GetRawManager(const Storage self) {
    return TypeErasedRef(dep(self).manager());
  }
  static void RegisterSubobjects(const Storage self,
                                 MemoryEstimator& memory_estimator) {
    memory_estimator.RegisterSubobjects(&dep(self));
  }

 public:
  static constexpr Methods<Handle> kMethods = {
      Destroy,
      Move,
      MoveToHeap,
      MakeReference,
      UsedSize<Handle, Manager>(),
      UsedAlign<Handle, Manager>(),
      TypeId::For<absl::remove_cvref_t<Manager>>(),
      IsOwning,
      GetRawManager,
      RegisterSubobjects};
};

}  // namespace any_internal
}  // namespace riegeli

#endif  // RIEGELI_BASE_ANY_INTERNAL_H_


================================================
FILE: riegeli/base/arithmetic.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_ARITHMETIC_H_
#define RIEGELI_BASE_ARITHMETIC_H_

#include <stddef.h>

#include <limits>
#include <type_traits>

#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/numeric/bits.h"
#include "absl/numeric/int128.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/type_traits.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// `IsUnsignedInt<T>::value` is `true` for unsigned integral types, including
// `absl::uint128`.
template <typename T>
struct IsUnsignedInt
    : std::conjunction<std::is_integral<T>, std::is_unsigned<T>> {};
template <>
struct IsUnsignedInt<absl::uint128> : std::true_type {};

// `IsSignedInt<T>::value` is `true` for signed integral types, including
// `absl::int128`.
template <typename T>
struct IsSignedInt : std::conjunction<std::is_integral<T>, std::is_signed<T>> {
};
template <>
struct IsSignedInt<absl::int128> : std::true_type {};

// `IsInt<T>::value` is `true` for integral types, including `absl::uint128` and
// `absl::int128`.
template <typename T>
struct IsInt : std::is_integral<T> {};
template <>
struct IsInt<absl::uint128> : std::true_type {};
template <>
struct IsInt<absl::int128> : std::true_type {};

// `MakeUnsigned<T>::type` and `MakeUnsignedT<T>` transform a signed integral
// type to the corresponding unsigned type, including `absl::int128`, and leave
// unsigned integral types unchanged, including `absl::uint128`.
template <typename T>
struct MakeUnsigned : std::make_unsigned<T> {};

template <>
struct MakeUnsigned<absl::int128> {
  using type = absl::uint128;
};

template <>
struct MakeUnsigned<absl::uint128> {
  using type = absl::uint128;
};

template <typename T>
using MakeUnsignedT = typename MakeUnsigned<T>::type;

// `IntCast<A>(value)` converts between integral types, asserting that `value`
// fits in the target type.

template <typename A, typename B,
          std::enable_if_t<
              std::conjunction_v<IsUnsignedInt<A>, IsUnsignedInt<B>>, int> = 0>
constexpr A IntCast(B value) {
  RIEGELI_ASSERT_LE(value, std::numeric_limits<A>::max())
      << "Failed precondition of IntCast(): value out of range";
  return static_cast<A>(value);
}

template <typename A, typename B,
          std::enable_if_t<std::conjunction_v<IsUnsignedInt<A>, IsSignedInt<B>>,
                           int> = 0>
constexpr A IntCast(B value) {
  RIEGELI_ASSERT_GE(value, 0)
      << "Failed precondition of IntCast(): value out of range";
  RIEGELI_ASSERT_LE(static_cast<MakeUnsignedT<B>>(value),
                    std::numeric_limits<A>::max())
      << "Failed precondition of IntCast(): value out of range";
  return static_cast<A>(value);
}

template <typename A, typename B,
          std::enable_if_t<std::conjunction_v<IsSignedInt<A>, IsUnsignedInt<B>>,
                           int> = 0>
constexpr A IntCast(B value) {
  RIEGELI_ASSERT_LE(value, MakeUnsignedT<A>{std::numeric_limits<A>::max()})
      << "Failed precondition of IntCast(): value out of range";
  return static_cast<A>(value);
}

template <typename A, typename B,
          std::enable_if_t<std::conjunction_v<IsSignedInt<A>, IsSignedInt<B>>,
                           int> = 0>
constexpr A IntCast(B value) {
  RIEGELI_ASSERT_GE(value, std::numeric_limits<A>::min())
      << "Failed precondition of IntCast(): value out of range";
  RIEGELI_ASSERT_LE(value, std::numeric_limits<A>::max())
      << "Failed precondition of IntCast(): value out of range";
  return static_cast<A>(value);
}

// `UnsignedCast(value)` converts `value` to the corresponding unsigned type,
// asserting that `value` was non-negative.
template <typename T,
          std::enable_if_t<std::disjunction_v<IsSignedInt<T>, IsUnsignedInt<T>>,
                           int> = 0>
constexpr MakeUnsignedT<T> UnsignedCast(T value) {
  return IntCast<MakeUnsignedT<T>>(value);
}

// `NegatingUnsignedCast(value)` converts `-value` to the corresponding unsigned
// type, asserting that `value` was non-positive, and correctly handling
// `std::numeric_limits<T>::min()`.
template <typename T, std::enable_if_t<IsSignedInt<T>::value, int> = 0>
constexpr MakeUnsignedT<T> NegatingUnsignedCast(T value) {
  RIEGELI_ASSERT_LE(value, 0)
      << "Failed precondition of NegatingUnsignedCast(): positive value";
  // Negate in the unsigned space to correctly handle
  // `std::numeric_limits<T>::min()`.
  return static_cast<MakeUnsignedT<T>>(0 -
                                       static_cast<MakeUnsignedT<T>>(value));
}

// `SignedMin()` returns the minimum of its arguments, which must be signed
// integers, as their widest type.

template <typename A, std::enable_if_t<IsSignedInt<A>::value, int> = 0>
constexpr A SignedMin(A a) {
  return a;
}

template <typename A, typename B,
          std::enable_if_t<std::conjunction_v<IsSignedInt<A>, IsSignedInt<B>>,
                           int> = 0>
constexpr std::common_type_t<A, B> SignedMin(A a, B b) {
  return a <= b ? a : b;
}

template <
    typename A, typename B, typename... Rest,
    std::enable_if_t<std::conjunction_v<
                         std::bool_constant<(sizeof...(Rest) > 0)>,
                         IsSignedInt<A>, IsSignedInt<B>, IsSignedInt<Rest>...>,
                     int> = 0>
constexpr std::common_type_t<A, B, Rest...> SignedMin(A a, B b, Rest... rest) {
  return SignedMin(SignedMin(a, b), rest...);
}

// `SignedMax()` returns the maximum of its arguments, which must be signed
// integers, as their widest type.

template <typename A, std::enable_if_t<IsSignedInt<A>::value, int> = 0>
constexpr A SignedMax(A a) {
  return a;
}

template <typename A, typename B,
          std::enable_if_t<std::conjunction_v<IsSignedInt<A>, IsSignedInt<B>>,
                           int> = 0>
constexpr std::common_type_t<A, B> SignedMax(A a, B b) {
  return a >= b ? a : b;
}

template <
    typename A, typename B, typename... Rest,
    std::enable_if_t<std::conjunction_v<
                         std::bool_constant<(sizeof...(Rest) > 0)>,
                         IsSignedInt<A>, IsSignedInt<B>, IsSignedInt<Rest>...>,
                     int> = 0>
constexpr std::common_type_t<A, B, Rest...> SignedMax(A a, B b, Rest... rest) {
  return SignedMax(SignedMax(a, b), rest...);
}

// `UnsignedMin()` returns the minimum of its arguments, which must be unsigned
// integers, as their narrowest type.

template <typename A, std::enable_if_t<IsUnsignedInt<A>::value, int> = 0>
constexpr A UnsignedMin(A a) {
  return a;
}

template <typename A, typename B,
          std::enable_if_t<
              std::conjunction_v<IsUnsignedInt<A>, IsUnsignedInt<B>>, int> = 0>
constexpr IntersectionTypeT<A, B> UnsignedMin(A a, B b) {
  return static_cast<IntersectionTypeT<A, B>>(a <= b ? a : b);
}

template <typename A, typename B, typename... Rest,
          std::enable_if_t<
              std::conjunction_v<std::bool_constant<(sizeof...(Rest) > 0)>,
                                 IsUnsignedInt<A>, IsUnsignedInt<B>,
                                 IsUnsignedInt<Rest>...>,
              int> = 0>
constexpr IntersectionTypeT<A, B, Rest...> UnsignedMin(A a, B b, Rest... rest) {
  return UnsignedMin(UnsignedMin(a, b), rest...);
}

// `UnsignedMax()` returns the maximum of its arguments, which must be unsigned
// integers, as their widest type.

template <typename A, std::enable_if_t<IsUnsignedInt<A>::value, int> = 0>
constexpr A UnsignedMax(A a) {
  return a;
}

template <typename A, typename B,
          std::enable_if_t<
              std::conjunction_v<IsUnsignedInt<A>, IsUnsignedInt<B>>, int> = 0>
constexpr std::common_type_t<A, B> UnsignedMax(A a, B b) {
  return a >= b ? a : b;
}

template <typename A, typename B, typename... Rest,
          std::enable_if_t<
              std::conjunction_v<std::bool_constant<(sizeof...(Rest) > 0)>,
                                 IsUnsignedInt<A>, IsUnsignedInt<B>,
                                 IsUnsignedInt<Rest>...>,
              int> = 0>
constexpr std::common_type_t<A, B, Rest...> UnsignedMax(A a, B b,
                                                        Rest... rest) {
  return UnsignedMax(UnsignedMax(a, b), rest...);
}

// `UnsignedClamp(value, min_value, max_value)` is at least `min_value`,
// at most `max(max_value, min_value)`, preferably `value`.
//
// If `min_value <= max_value`, then it is equivalent to `std::clamp()`,
// otherwise `min_value` wins.
template <
    typename Value, typename Min, typename Max,
    std::enable_if_t<std::conjunction_v<IsUnsignedInt<Value>,
                                        IsUnsignedInt<Min>, IsUnsignedInt<Max>>,
                     int> = 0>
constexpr std::common_type_t<IntersectionTypeT<Value, Max>, Min> UnsignedClamp(
    Value value, Min min, Max max) {
  return UnsignedMax(UnsignedMin(value, max), min);
}

// `SaturatingIntCast()` converts an integer value to another integer type, or
// returns the appropriate bound of the type if conversion would overflow.

template <typename A, typename B,
          std::enable_if_t<
              std::conjunction_v<IsUnsignedInt<A>, IsUnsignedInt<B>>, int> = 0>
constexpr A SaturatingIntCast(B value) {
  if (ABSL_PREDICT_FALSE(value > std::numeric_limits<A>::max())) {
    return std::numeric_limits<A>::max();
  }
  return static_cast<A>(value);
}

template <typename A, typename B,
          std::enable_if_t<std::conjunction_v<IsUnsignedInt<A>, IsSignedInt<B>>,
                           int> = 0>
constexpr A SaturatingIntCast(B value) {
  if (ABSL_PREDICT_FALSE(value < 0)) return 0;
  if (ABSL_PREDICT_FALSE(static_cast<MakeUnsignedT<B>>(value) >
                         std::numeric_limits<A>::max())) {
    return std::numeric_limits<A>::max();
  }
  return static_cast<A>(value);
}

template <typename A, typename B,
          std::enable_if_t<std::conjunction_v<IsSignedInt<A>, IsUnsignedInt<B>>,
                           int> = 0>
constexpr A SaturatingIntCast(B value) {
  if (ABSL_PREDICT_FALSE(value >
                         MakeUnsignedT<A>{std::numeric_limits<A>::max()})) {
    return std::numeric_limits<A>::max();
  }
  return static_cast<A>(value);
}

template <typename A, typename B,
          std::enable_if_t<std::conjunction_v<IsSignedInt<A>, IsSignedInt<B>>,
                           int> = 0>
constexpr A SaturatingIntCast(B value) {
  if (ABSL_PREDICT_FALSE(value < std::numeric_limits<A>::min())) {
    return std::numeric_limits<A>::min();
  }
  if (ABSL_PREDICT_FALSE(value > std::numeric_limits<A>::max())) {
    return std::numeric_limits<A>::max();
  }
  return static_cast<A>(value);
}

// `SaturatingAdd()` adds unsigned values, or returns max possible value of the
// type if addition would overflow.

template <typename T, std::enable_if_t<IsUnsignedInt<T>::value, int> = 0>
constexpr T SaturatingAdd() {
  return 0;
}

template <typename T, std::enable_if_t<IsUnsignedInt<T>::value, int> = 0>
constexpr T SaturatingAdd(T a) {
  return a;
}

template <typename T, std::enable_if_t<IsUnsignedInt<T>::value, int> = 0>
constexpr T SaturatingAdd(T a, T b) {
  if (ABSL_PREDICT_FALSE(b > std::numeric_limits<T>::max() - a)) {
    return std::numeric_limits<T>::max();
  }
  return a + b;
}

template <typename T, typename... Rest,
          std::enable_if_t<
              std::conjunction_v<std::bool_constant<(sizeof...(Rest) > 0)>,
                                 IsUnsignedInt<T>, IsUnsignedInt<Rest>...>,
              int> = 0>
constexpr T SaturatingAdd(T a, T b, Rest... rest) {
  return SaturatingAdd(SaturatingAdd(a, b), rest...);
}

// `SaturatingSub()` subtracts unsigned values, or returns 0 if subtraction
// would underflow.
template <typename T, typename U,
          std::enable_if_t<
              std::conjunction_v<IsUnsignedInt<T>, IsUnsignedInt<U>>, int> = 0>
constexpr T SaturatingSub(T a, U b) {
  if (ABSL_PREDICT_FALSE(b > a)) return 0;
  return a - IntCast<T>(b);
}

// `RoundDown()` rounds an unsigned value downwards to the nearest multiple of
// the given power of 2.
template <size_t alignment, typename T,
          std::enable_if_t<
              std::conjunction_v<IsUnsignedInt<T>,
                                 std::integral_constant<
                                     bool, absl::has_single_bit(alignment)>>,
              int> = 0>
constexpr T RoundDown(T value) {
  return value & ~T{alignment - 1};
}

// `RoundUp()` rounds an unsigned value upwards to the nearest multiple of the
// given power of 2.
template <size_t alignment, typename T,
          std::enable_if_t<
              std::conjunction_v<IsUnsignedInt<T>,
                                 std::integral_constant<
                                     bool, absl::has_single_bit(alignment)>>,
              int> = 0>
constexpr T RoundUp(T value) {
  return ((value - 1) | T{alignment - 1}) + 1;
}

// `PtrDistance(first, last)` returns `last - first` as `size_t`, asserting that
// `first <= last`.
template <typename A>
constexpr size_t PtrDistance(const A* absl_nullable first,
                             const A* absl_nullable last) {
  RIEGELI_ASSERT_EQ(first == nullptr, last == nullptr)
      << "Failed precondition of PtrDistance(): "
         "nullptr compared with non-nullptr";
  RIEGELI_ASSERT_LE(first, last)
      << "Failed precondition of PtrDistance(): pointers in the wrong order";
  return IntCast<size_t>(last - first);
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_ARITHMETIC_H_


================================================
FILE: riegeli/base/assert.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/base/assert.h"

#include <string>

#include "absl/base/nullability.h"
#include "absl/log/absl_log.h"
#include "riegeli/base/stream_utils.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli::assert_internal {

CheckResult::CheckResult(const char* function, const char* prefix)
    : header_(new StringOStream(new std::string())) {
  header() << "Check failed in " << function << ": " << prefix;
}

CheckFailed::CheckFailed(const char* file, int line, CheckResult check_result)
    : file_(file),
      line_(line),
      check_result_(check_result),
      details_(new StringOStream(new std::string())) {}

CheckFailed::~CheckFailed() {
  if (!details_->dest()->empty()) {
    check_result_.header() << "; " << *details_->dest();
  }
  ABSL_LOG(FATAL).AtLocation(file_, line_) << *check_result_.header().dest();
}

void CheckNotNullFailed(const char* file, int line, const char* function,
                        const char* expression) {
  CheckResult check_result(function, expression);
  check_result.header() << " != nullptr";
  CheckFailed check_failed(file, line, check_result);
}

CheckResult CheckImpossibleResult(const char* function) {
  return CheckResult(function, "Impossible");
}

}  // namespace riegeli::assert_internal


================================================
FILE: riegeli/base/assert.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_ASSERT_H_
#define RIEGELI_BASE_ASSERT_H_

#include <stddef.h>

#include <cassert>
#include <ostream>  // IWYU pragma: export
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "riegeli/base/debug.h"
#include "riegeli/base/port.h"
#include "riegeli/base/stream_utils.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// `RIEGELI_DEBUG` determines whether assertions are verified or just assumed.
// By default it follows `NDEBUG`.

#ifndef RIEGELI_DEBUG
#ifdef NDEBUG
#define RIEGELI_DEBUG 0
#else
#define RIEGELI_DEBUG 1
#endif
#endif

namespace assert_internal {

#if __cpp_lib_unreachable
#define RIEGELI_INTERNAL_UNREACHABLE() ::std::unreachable()
#elif RIEGELI_INTERNAL_HAS_BUILTIN(__builtin_unreachable) || \
    RIEGELI_INTERNAL_IS_GCC_VERSION(4, 5)
#define RIEGELI_INTERNAL_UNREACHABLE() __builtin_unreachable()
#elif defined(_WIN32)
#define RIEGELI_INTERNAL_UNREACHABLE() __assume(false)
#else
#define RIEGELI_INTERNAL_UNREACHABLE() for (;;)
#endif

// Indicates that a check succeeded or failed.
//
// If it failed, stores a stream for writing the header.
class CheckResult {
 public:
  // A check succeeded.
  CheckResult() = default;

  // A check failed. The header will begin with
  // "Check failed in function: prefix".
  explicit CheckResult(const char* function, const char* prefix);

  CheckResult(const CheckResult& that) = default;
  CheckResult& operator=(const CheckResult& that) = default;

  // Returns `true` if the check succeeded.
  explicit operator bool() const { return header_ == nullptr; }

  // Returns the header stream.
  //
  // Precondition: the check failed, i.e. `*this` is `false`.
  StringOStream& header() {
    assert(header_ != nullptr);
    return *header_;
  }

 private:
  StringOStream* absl_nullable header_ = nullptr;
};

// Stores a `CheckResult` and a stream for adding details to the message.
// The message is "header; details", or just "header" if details are empty.
// In the destructor, outputs the message and terminates the program.
class CheckFailed {
 public:
  explicit CheckFailed(const char* file, int line, CheckResult check_result);

  // Allows to add details to the message by writing to the stream.
  std::ostream& details() { return *details_; }

  // Prints the check failure message and terminates the program.
  ABSL_ATTRIBUTE_NORETURN ~CheckFailed();

 private:
  const char* file_;
  int line_;
  CheckResult check_result_;
  StringOStream* details_;
};

// Indicates that a check failed with the message header
// "Check failed in function: assertion (a vs. b)".
template <typename A, typename B>
ABSL_ATTRIBUTE_COLD CheckResult CheckOpResult(const char* function,
                                              const char* assertion, const A& a,
                                              const B& b) {
  CheckResult check_result(function, assertion);
  check_result.header() << " (" << riegeli::Debug(a) << " vs. "
                        << riegeli::Debug(b) << ")";
  return check_result;
}

// Indicates that a check failed with the message header
// "Check failed in function: expression is OK (status)".

namespace assert_internal {

template <typename T, typename Enable = void>
struct HasStatus : std::false_type {};

template <typename T>
struct HasStatus<T, std::void_t<decltype(std::declval<T>().status())>>
    : std::true_type {};

}  // namespace assert_internal

template <typename StatusType>
ABSL_ATTRIBUTE_COLD CheckResult CheckOkResult(const char* function,
                                              const char* expression,
                                              const StatusType& status) {
  if constexpr (!assert_internal::HasStatus<StatusType>::value) {
    // `absl::Status`.
    CheckResult check_result(function, expression);
    check_result.header() << " is OK (" << status << ")";
    return check_result;
  } else {
    // `absl::StatusOr<T>`.
    return CheckOkResult(function, expression, status.status());
  }
}

// Writes "Check failed in function: expression != nullptr" and terminates
// the program.
ABSL_ATTRIBUTE_NORETURN void CheckNotNullFailed(const char* file, int line,
                                                const char* function,
                                                const char* expression);

// Indicates that a check failed with the message header
// "Check failed in function: Impossible".
ABSL_ATTRIBUTE_COLD CheckResult CheckImpossibleResult(const char* function);

// These functions allow using `a` and `b` multiple times without reevaluation.
// They are small enough to be inlined, with the slow path delegated to
// `CheckOpResult()`.
#define RIEGELI_INTERNAL_DEFINE_CHECK_OP(name, op)                            \
  template <typename A, typename B>                                           \
  inline CheckResult Check##name(const char* function, const char* assertion, \
                                 const A& a, const B& b) {                    \
    if (ABSL_PREDICT_TRUE(a op b)) return CheckResult();                      \
    CheckResult check_result = CheckOpResult(function, assertion, a, b);      \
    if (check_result) RIEGELI_INTERNAL_UNREACHABLE();                         \
    return check_result;                                                      \
  }                                                                           \
  static_assert(true, "")  // Eat a semicolon.

RIEGELI_INTERNAL_DEFINE_CHECK_OP(Eq, ==);
RIEGELI_INTERNAL_DEFINE_CHECK_OP(Ne, !=);
RIEGELI_INTERNAL_DEFINE_CHECK_OP(Lt, <);
RIEGELI_INTERNAL_DEFINE_CHECK_OP(Gt, >);
RIEGELI_INTERNAL_DEFINE_CHECK_OP(Le, <=);
RIEGELI_INTERNAL_DEFINE_CHECK_OP(Ge, >=);

#undef RIEGELI_INTERNAL_DEFINE_CHECK_OP

template <typename StatusType>
inline CheckResult CheckOk(const char* function, const char* expression,
                           const StatusType& status) {
  if (ABSL_PREDICT_TRUE(status.ok())) return CheckResult();
  CheckResult check_result = CheckOkResult(function, expression, status);
  if (check_result) RIEGELI_INTERNAL_UNREACHABLE();
  return check_result;
}

template <typename T>
inline T&& CheckNotNull(const char* file, int line, const char* function,
                        const char* expression, T&& value) {
  if (ABSL_PREDICT_FALSE(value == nullptr)) {
    CheckNotNullFailed(file, line, function, expression);
  }
  return std::forward<T>(value);
}

#if !RIEGELI_DEBUG

// These functions allow using `a` and `b` multiple times without reevaluation.
// They are small enough to be inlined.
#define RIEGELI_INTERNAL_DEFINE_EVAL_ASSERT_OP(name, op)   \
  template <typename A, typename B>                        \
  inline bool EvalAssert##name(const A& a, const B& b) {   \
    return true || a op b; /* Check that this compiles. */ \
  }                                                        \
  static_assert(true, "")  // Eat a semicolon.

RIEGELI_INTERNAL_DEFINE_EVAL_ASSERT_OP(Eq, ==);
RIEGELI_INTERNAL_DEFINE_EVAL_ASSERT_OP(Ne, !=);
RIEGELI_INTERNAL_DEFINE_EVAL_ASSERT_OP(Lt, <);
RIEGELI_INTERNAL_DEFINE_EVAL_ASSERT_OP(Gt, >);
RIEGELI_INTERNAL_DEFINE_EVAL_ASSERT_OP(Le, <=);
RIEGELI_INTERNAL_DEFINE_EVAL_ASSERT_OP(Ge, >=);

#undef RIEGELI_INTERNAL_DEFINE_EVAL_ASSERT_OP

template <typename T>
inline T&& EvalAssertNotNull(T&& value) {
  ABSL_ATTRIBUTE_UNUSED const bool condition =
      true || value == nullptr;  // Check that this compiles.
  return std::forward<T>(value);
}

template <typename StatusType>
inline bool EvalAssertOk(const StatusType& status) {
  return true || status.ok();  // Check that this compiles.
}

template <typename T>
inline T&& AssumeNotNull(T&& value) {
  if (value == nullptr) RIEGELI_INTERNAL_UNREACHABLE();
  return std::forward<T>(value);
}

#ifdef _MSC_VER
// Silence MSVC warning for destructor that does not return.
#pragma warning(push)
#pragma warning(disable : 4722)
#endif

class UnreachableStream {
 public:
  UnreachableStream() { RIEGELI_INTERNAL_UNREACHABLE(); }

  ABSL_ATTRIBUTE_NORETURN ~UnreachableStream() {
    RIEGELI_INTERNAL_UNREACHABLE();
  }

  template <typename T>
  UnreachableStream& operator<<(ABSL_ATTRIBUTE_UNUSED T&& src) {
    return *this;
  }
};

#ifdef _MSC_VER
#pragma warning(pop)
#endif

#endif  // !RIEGELI_DEBUG

// Allow `MACRO()` expanding to `if (condition) ...; else ...`, to be usable as
// `if (condition) MACRO();` without a warning about ambiguous `else`.
// The definition of `MACRO()` must begin with `RIEGELI_INTERNAL_BLOCK_ELSE`.
#define RIEGELI_INTERNAL_BLOCK_ELSE \
  switch (0)                        \
  case 0:                           \
  default:

}  // namespace assert_internal

// `RIEGELI_CHECK(expr)` checks that `expr` is `true`, terminating the program
// if not.
//
// `RIEGELI_CHECK_{EQ,NE,LT,GT,LE,GE}(a, b)` check the relationship between `a`
// and `b`, and include values of `a` and `b` in the failure message. The values
// must be printable using `riegeli::Debug()`.
//
// `RIEGELI_CHECK_NOTNULL(expr)` checks that `expr` is not `nullptr` and returns
// `expr`.
//
// `RIEGELI_CHECK_OK(expr)` checks that `expr.ok()`, and includes either
// `expr.status()` or `expr` in the failure message. Supported types include
// `absl::Status`, `absl::StatusOr<T>`, and classes deriving from `Object`.
//
// `RIEGELI_CHECK_NOTNULL(expr)` is an expression which evaluates to `expr`.
// The remaining `RIEGELI_CHECK*` macros can be followed by streaming `<<`
// operators in order to append more details to the failure message
// (streamed expressions are evaluated only on assertion failure).
//
// `RIEGELI_CHECK_UNREACHABLE()` checks that this point is not reached.

#if defined(__clang__) || RIEGELI_INTERNAL_IS_GCC_VERSION(2, 6)
#define RIEGELI_INTERNAL_FUNCTION __PRETTY_FUNCTION__
#elif defined(_WIN32)
#define RIEGELI_INTERNAL_FUNCTION __FUNCSIG__
#else
#define RIEGELI_INTERNAL_FUNCTION __func__
#endif

#define RIEGELI_INTERNAL_CHECK_OP(name, op, a, b)                            \
  RIEGELI_INTERNAL_BLOCK_ELSE                                                \
  if (const ::riegeli::assert_internal::CheckResult riegeli_internal_check = \
          ::riegeli::assert_internal::Check##name(RIEGELI_INTERNAL_FUNCTION, \
                                                  #a " " #op " " #b, a, b))  \
    ;                                                                        \
  else                                                                       \
    ::riegeli::assert_internal::CheckFailed(__FILE__, __LINE__,              \
                                            riegeli_internal_check)          \
        .details()

#define RIEGELI_CHECK(expr)                                                \
  RIEGELI_INTERNAL_BLOCK_ELSE                                              \
  if (ABSL_PREDICT_TRUE(expr))                                             \
    ;                                                                      \
  else                                                                     \
    ::riegeli::assert_internal::CheckFailed(                               \
        __FILE__, __LINE__,                                                \
        ::riegeli::assert_internal::CheckResult(RIEGELI_INTERNAL_FUNCTION, \
                                                #expr))                    \
        .details()
#define RIEGELI_CHECK_EQ(a, b) RIEGELI_INTERNAL_CHECK_OP(Eq, ==, a, b)
#define RIEGELI_CHECK_NE(a, b) RIEGELI_INTERNAL_CHECK_OP(Ne, !=, a, b)
#define RIEGELI_CHECK_LT(a, b) RIEGELI_INTERNAL_CHECK_OP(Lt, <, a, b)
#define RIEGELI_CHECK_GT(a, b) RIEGELI_INTERNAL_CHECK_OP(Gt, >, a, b)
#define RIEGELI_CHECK_LE(a, b) RIEGELI_INTERNAL_CHECK_OP(Le, <=, a, b)
#define RIEGELI_CHECK_GE(a, b) RIEGELI_INTERNAL_CHECK_OP(Ge, >=, a, b)
#define RIEGELI_CHECK_OK(status)                                             \
  RIEGELI_INTERNAL_BLOCK_ELSE                                                \
  if (const ::riegeli::assert_internal::CheckResult riegeli_internal_check = \
          ::riegeli::assert_internal::CheckOk(RIEGELI_INTERNAL_FUNCTION,     \
                                              #status, status))              \
    ;                                                                        \
  else                                                                       \
    ::riegeli::assert_internal::CheckFailed(__FILE__, __LINE__,              \
                                            riegeli_internal_check)          \
        .details()
#define RIEGELI_CHECK_NOTNULL(expr)         \
  ::riegeli::assert_internal::CheckNotNull( \
      __FILE__, __LINE__, RIEGELI_INTERNAL_FUNCTION, #expr, expr)
#define RIEGELI_CHECK_UNREACHABLE()                      \
  ::riegeli::assert_internal::CheckFailed(               \
      __FILE__, __LINE__,                                \
      ::riegeli::assert_internal::CheckImpossibleResult( \
          RIEGELI_INTERNAL_FUNCTION))                    \
      .details()

// If `RIEGELI_DEBUG` is `true`, `RIEGELI_ASSERT*` macros are equivalent to the
// corresponding `RIEGELI_CHECK*` macros.
//
// If `RIEGELI_DEBUG` is `false`, they do nothing except for ensuring that the
// assertion compiles, and that any code appending to the stream compiles.
//
// There is no `RIEGELI_ASSERT_NOTNULL` because the argument is returned, and
// thus it is necessarily always evaluated also if `RIEGELI_DEBUG` is `false`
// (the semantics of `RIEGELI_ASSERT*` of doing nothing if `RIEGELI_DEBUG` is
// `false` cannot be followed). Use `RIEGELI_EVAL_ASSERT_NOTNULL` instead.
//
// There is no `RIEGELI_ASSERT_UNREACHABLE` because no following code is
// expected, and thus this point is necessarily never reached also if
// `RIEGELI_DEBUG` is `false` (the semantics of `RIEGELI_ASSERT*` of doing
// nothing if `RIEGELI_DEBUG` is `false` cannot be followed). Use
// `RIEGELI_ASSUME_UNREACHABLE` instead.

#if RIEGELI_DEBUG

#define RIEGELI_ASSERT RIEGELI_CHECK
#define RIEGELI_ASSERT_EQ RIEGELI_CHECK_EQ
#define RIEGELI_ASSERT_NE RIEGELI_CHECK_NE
#define RIEGELI_ASSERT_LT RIEGELI_CHECK_LT
#define RIEGELI_ASSERT_GT RIEGELI_CHECK_GT
#define RIEGELI_ASSERT_LE RIEGELI_CHECK_LE
#define RIEGELI_ASSERT_GE RIEGELI_CHECK_GE
#define RIEGELI_ASSERT_OK RIEGELI_CHECK_OK

#else  // !RIEGELI_DEBUG

#define RIEGELI_ASSERT(expr)  \
  RIEGELI_INTERNAL_BLOCK_ELSE \
  if (true || (expr))         \
    ;                         \
  else                        \
    ::riegeli::assert_internal::UnreachableStream()

#define RIEGELI_ASSERT_EQ(a, b) RIEGELI_ASSERT((a) == (b))
#define RIEGELI_ASSERT_NE(a, b) RIEGELI_ASSERT((a) != (b))
#define RIEGELI_ASSERT_LT(a, b) RIEGELI_ASSERT((a) < (b))
#define RIEGELI_ASSERT_GT(a, b) RIEGELI_ASSERT((a) > (b))
#define RIEGELI_ASSERT_LE(a, b) RIEGELI_ASSERT((a) <= (b))
#define RIEGELI_ASSERT_GE(a, b) RIEGELI_ASSERT((a) >= (b))
#define RIEGELI_ASSERT_OK(status) RIEGELI_ASSERT((status).ok())

#endif  // !RIEGELI_DEBUG

// If `RIEGELI_DEBUG` is `true`, `RIEGELI_EVAL_ASSERT*` macros are equivalent to
// the corresponding `RIEGELI_CHECK*` macros.
//
// If `RIEGELI_DEBUG` is `false`, they evaluate the arguments, but do not check
// the assertion, although they verify that evaluating the assertion and any
// code appending to the stream compiles.
//
// There is no `RIEGELI_EVAL_ASSERT_UNREACHABLE` because there is no argument
// to evaluate, and because no following code is expected, and thus this point
// is necessarily never reached also if `RIEGELI_DEBUG` is `false` (the
// semantics of `RIEGELI_EVAL_ASSERT*` of doing nothing besides evaluating the
// arguments cannot be followed). Use `RIEGELI_ASSUME_UNREACHABLE` instead.

#if RIEGELI_DEBUG

#define RIEGELI_EVAL_ASSERT RIEGELI_CHECK
#define RIEGELI_EVAL_ASSERT_EQ RIEGELI_CHECK_EQ
#define RIEGELI_EVAL_ASSERT_NE RIEGELI_CHECK_NE
#define RIEGELI_EVAL_ASSERT_LT RIEGELI_CHECK_LT
#define RIEGELI_EVAL_ASSERT_GT RIEGELI_CHECK_GT
#define RIEGELI_EVAL_ASSERT_LE RIEGELI_CHECK_LE
#define RIEGELI_EVAL_ASSERT_GE RIEGELI_CHECK_GE
#define RIEGELI_EVAL_ASSERT_OK RIEGELI_CHECK_OK
#define RIEGELI_EVAL_ASSERT_NOTNULL RIEGELI_CHECK_NOTNULL

#else  // !RIEGELI_DEBUG

#define RIEGELI_INTERNAL_EVAL_ASSERT_OP(name, a, b)       \
  RIEGELI_INTERNAL_BLOCK_ELSE                             \
  if (::riegeli::assert_internal::EvalAssert##name(a, b)) \
    ;                                                     \
  else                                                    \
    ::riegeli::assert_internal::UnreachableStream()

#define RIEGELI_EVAL_ASSERT(expr) \
  RIEGELI_INTERNAL_BLOCK_ELSE     \
  if ((expr) || true)             \
    ;                             \
  else                            \
    ::riegeli::assert_internal::UnreachableStream()
#define RIEGELI_EVAL_ASSERT_EQ(a, b) RIEGELI_INTERNAL_EVAL_ASSERT_OP(Eq, a, b)
#define RIEGELI_EVAL_ASSERT_NE(a, b) RIEGELI_INTERNAL_EVAL_ASSERT_OP(Ne, a, b)
#define RIEGELI_EVAL_ASSERT_LT(a, b) RIEGELI_INTERNAL_EVAL_ASSERT_OP(Lt, a, b)
#define RIEGELI_EVAL_ASSERT_GT(a, b) RIEGELI_INTERNAL_EVAL_ASSERT_OP(Gt, a, b)
#define RIEGELI_EVAL_ASSERT_LE(a, b) RIEGELI_INTERNAL_EVAL_ASSERT_OP(Le, a, b)
#define RIEGELI_EVAL_ASSERT_GE(a, b) RIEGELI_INTERNAL_EVAL_ASSERT_OP(Ge, a, b)
#define RIEGELI_EVAL_ASSERT_NOTNULL(expr) \
  ::riegeli::assert_internal::EvalAssertNotNull(expr)
#define RIEGELI_EVAL_ASSERT_OK(status)                  \
  RIEGELI_INTERNAL_BLOCK_ELSE                           \
  if (::riegeli::assert_internal::EvalAssertOk(status)) \
    ;                                                   \
  else                                                  \
    ::riegeli::assert_internal::UnreachableStream()

#endif

// If `RIEGELI_DEBUG` is `true`, `RIEGELI_ASSUME*` macros are equivalent to the
// corresponding `RIEGELI_CHECK*` macros.
//
// If `RIEGELI_DEBUG` is `false`, the behavior is undefined if the assertion
// fails, which allows the compiler to perform optimizations based on that.
//
// The condition is evaluated unconditionally, but this should not be relied
// upon, as a future implementation might not ensure this. To make it optimized
// out when `RIEGELI_DEBUG` is `false`, it should use only operations which are
// expected to be optimized out when the result of the condition is not needed,
// in particular it should not call non-inline functions.

#if RIEGELI_DEBUG

#define RIEGELI_ASSUME RIEGELI_CHECK
#define RIEGELI_ASSUME_EQ RIEGELI_CHECK_EQ
#define RIEGELI_ASSUME_NE RIEGELI_CHECK_NE
#define RIEGELI_ASSUME_LT RIEGELI_CHECK_LT
#define RIEGELI_ASSUME_GT RIEGELI_CHECK_GT
#define RIEGELI_ASSUME_LE RIEGELI_CHECK_LE
#define RIEGELI_ASSUME_GE RIEGELI_CHECK_GE
#define RIEGELI_ASSUME_OK RIEGELI_CHECK_OK
#define RIEGELI_ASSUME_NOTNULL RIEGELI_CHECK_NOTNULL
#define RIEGELI_ASSUME_UNREACHABLE RIEGELI_CHECK_UNREACHABLE

#else  // !RIEGELI_DEBUG

#define RIEGELI_ASSUME(expr)  \
  RIEGELI_INTERNAL_BLOCK_ELSE \
  if (expr)                   \
    ;                         \
  else                        \
    RIEGELI_ASSUME_UNREACHABLE()

#define RIEGELI_ASSUME_EQ(a, b) RIEGELI_ASSUME((a) == (b))
#define RIEGELI_ASSUME_NE(a, b) RIEGELI_ASSUME((a) != (b))
#define RIEGELI_ASSUME_LT(a, b) RIEGELI_ASSUME((a) < (b))
#define RIEGELI_ASSUME_GT(a, b) RIEGELI_ASSUME((a) > (b))
#define RIEGELI_ASSUME_LE(a, b) RIEGELI_ASSUME((a) <= (b))
#define RIEGELI_ASSUME_GE(a, b) RIEGELI_ASSUME((a) >= (b))
#define RIEGELI_ASSUME_OK(status) RIEGELI_ASSUME((status).ok())
#define RIEGELI_ASSUME_NOTNULL(expr) \
  ::riegeli::assert_internal::AssumeNotNull(expr)
#define RIEGELI_ASSUME_UNREACHABLE() \
  ::riegeli::assert_internal::UnreachableStream()

#endif  // !RIEGELI_DEBUG

// Asserts that a region of memory is initialized, which is checked when running
// under memory sanitizer.
inline void AssertInitialized(ABSL_ATTRIBUTE_UNUSED const char* data,
                              ABSL_ATTRIBUTE_UNUSED size_t size) {
#ifdef MEMORY_SANITIZER
  __msan_check_mem_is_initialized(data, size);
#endif
}

// Marks that a region of memory should be treated as uninitialized, which is
// checked when running under memory sanitizer.
inline void MarkPoisoned(ABSL_ATTRIBUTE_UNUSED const char* data,
                         ABSL_ATTRIBUTE_UNUSED size_t size) {
#ifdef MEMORY_SANITIZER
  __msan_poison(data, size);
#endif
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_ASSERT_H_


================================================
FILE: riegeli/base/background_cleaning.cc
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/base/background_cleaning.h"

#include <list>

#include "absl/base/thread_annotations.h"
#include "absl/synchronization/mutex.h"
#include "absl/time/time.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/parallelism.h"

namespace riegeli {

BackgroundCleanee::~BackgroundCleanee() = default;  // Key method.

BackgroundCleaner::Token BackgroundCleaner::Register(
    BackgroundCleanee* cleanee) {
  absl::MutexLock lock(mutex_);
  entries_.emplace_front(cleanee, absl::InfiniteFuture());
  return Token(entries_.begin());
}

void BackgroundCleaner::Unregister(Token token) {
  absl::MutexLock lock(mutex_);
  CancelCleaningInternal(token);
  if (next_ == token.iter()) ++next_;
  entries_.erase(token.iter());
}

void BackgroundCleaner::CancelCleaning(Token token) {
  absl::MutexLock lock(mutex_);
  CancelCleaningInternal(token);
  if (token.iter()->deadline == absl::InfiniteFuture()) return;
  // Move `token.iter()` before `next_`.
  if (next_ == token.iter()) {
    ++next_;
  } else {
    entries_.splice(next_, entries_, token.iter());
  }
  token.iter()->deadline = absl::InfiniteFuture();
}

// Waits until this cleanee is not being cleaned.
inline void BackgroundCleaner::CancelCleaningInternal(Token token) {
  struct Args {
    BackgroundCleanee** current_cleanee;
    BackgroundCleanee* cleanee_to_unregister;
  };
  Args args{&current_cleanee_, token.iter()->cleanee};
  mutex_.Await(absl::Condition(
      +[](Args* args) {
        return *args->current_cleanee != args->cleanee_to_unregister;
      },
      &args));
}

void BackgroundCleaner::ScheduleCleaningSlow(Token token, absl::Time deadline) {
  absl::MutexLock lock(mutex_);
  if (token.iter()->deadline <= deadline) {
    // Cleaning is already scheduled with the same or earlier deadline.
    return;
  }

  // Move `token.iter()` to the right place after `next_`.
  Entries::iterator iter =
      token.iter()->deadline == absl::InfiniteFuture()
          ? entries_.end()  // Schedule new cleaning: move from before `next_`.
          : token.iter();   // Reduce deadline: move backwards.
  for (;;) {
    if (iter == next_) {
      // Insert `token.iter()` before `iter` which is `next_`.
      next_ = token.iter();
      deadline_reduced_ = true;
      break;
    }
    const Entries::iterator last_iter = iter;
    --iter;
    if (iter->deadline <= deadline) {
      // Insert `token.iter()` after `iter`, i.e. before `last_iter`.
      // This might be its old place, then `splice()` does nothing.
      iter = last_iter;
      break;
    }
  }
  entries_.splice(iter, entries_, token.iter());
  RIEGELI_ASSERT(next_ != entries_.end())
      << "next_ must cover at least token.iter()";
  token.iter()->deadline = deadline;

  // Start a background thread if needed.
  if (!no_background_thread_) return;
  no_background_thread_ = false;
  internal::ThreadPool::global().Schedule([this] {
    absl::MutexLock lock(mutex_);
    BackgroundThread();
    no_background_thread_ = true;
  });
}

inline void BackgroundCleaner::BackgroundThread()
    ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) {
  if (next_ == entries_.end()) return;
  for (;;) {
    // Wait until the next deadline.
    do {
      deadline_reduced_ = false;
      mutex_.AwaitWithDeadline(absl::Condition(&deadline_reduced_),
                               next_->deadline);
      if (next_ == entries_.end()) return;
    } while (deadline_reduced_);
    // Schedule cleaning.
    for (;;) {
      const absl::Time now = TimeNow();
      if (next_->deadline > now) break;
      BackgroundCleanee* const cleanee = next_->cleanee;
      next_->deadline = absl::InfiniteFuture();
      ++next_;
      current_cleanee_ = cleanee;
      mutex_.unlock();
      cleanee->Clean(now);
      mutex_.lock();
      current_cleanee_ = nullptr;
      if (next_ == entries_.end()) return;
    }
  }
}

BackgroundCleaner::~BackgroundCleaner() {
  RIEGELI_CHECK(entries_.empty())
      << "Failed precondition of BackgroundCleaner::~BackgroundCleaner(): "
         "some cleanees remain registered";
  absl::MutexLock lock(mutex_);
  // Request the background thread to exit.
  deadline_reduced_ = true;
  mutex_.Await(absl::Condition(&no_background_thread_));
}

}  // namespace riegeli


================================================
FILE: riegeli/base/background_cleaning.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_BACKGROUND_CLEANING_H_
#define RIEGELI_BASE_BACKGROUND_CLEANING_H_

#include <list>

#include "absl/base/thread_annotations.h"
#include "absl/synchronization/mutex.h"
#include "absl/time/clock.h"
#include "absl/time/time.h"
#include "riegeli/base/global.h"

namespace riegeli {

// An interface of objects which need background cleaning.
class BackgroundCleanee {
 public:
  virtual ~BackgroundCleanee();

 protected:
  friend class BackgroundCleaner;  // For `Clean()`.

  // Called from a background thread when a scheduled cleaning time arrived.
  //
  // `now` is the current time, passed as a parameter so that there is no need
  // to call `BackgroundCleaner::TimeNow()` again if that time is needed to
  // decide what to clean.
  virtual void Clean(absl::Time now) = 0;
};

// Manages objects which need background cleaning, scheduling cleaning calls
// from a background thread.
class BackgroundCleaner {
 private:
  struct Entry {
    explicit Entry(BackgroundCleanee* cleanee, absl::Time deadline)
        : cleanee(cleanee), deadline(deadline) {}

    BackgroundCleanee* cleanee;
    absl::Time deadline;
  };
  using Entries = std::list<Entry>;

 public:
  // Registration token of an object which needs background cleaning.
  class Token {
   public:
    Token() = default;

    Token(const Token& that) = default;
    Token& operator=(const Token& that) = default;

   private:
    friend class BackgroundCleaner;  // For `Token()` and `iter()`.

    explicit Token(Entries::iterator iter) : iter_(iter) {}

    Entries::iterator iter() const { return iter_; }

    Entries::iterator iter_{};
  };

  BackgroundCleaner() = default;

  BackgroundCleaner(const BackgroundCleaner&) = delete;
  BackgroundCleaner& operator=(const BackgroundCleaner&) = delete;

  // Precondition: all registered cleanees have been unregistered.
  ~BackgroundCleaner();

  // Returns a default global `BackgroundCleaner`.
  static BackgroundCleaner& global() {
    return Global([] { return BackgroundCleaner(); });
  }

  // Registers the cleanee, allowing `ScheduleCleaning()` calls.
  //
  // Thread safe.
  Token Register(BackgroundCleanee* cleanee);

  // Unregisters the cleanee corresponding to `token`, invalidating `token` and
  // cancelling any pending cleaning.
  //
  // This might block if the cleanee is being cleaned or will be cleaned soon,
  // so this must not be called under a mutex needed for cleaning.
  //
  // Thread safe.
  void Unregister(Token token);

  // Cancels any pending cleaning corresponding to `token`. Does not unregister
  // the cleanee.
  //
  // This might block if the cleanee is being cleaned or will be cleaned soon,
  // so this must not be called under a mutex needed for cleaning.
  //
  // Thread safe.
  void CancelCleaning(Token token);

  // Schedules cleaning the cleanee corresponding to `token` at `deadline`.
  //
  // If `deadline` is `absl::InfiniteFuture()`, cleaning will never happen.
  // If `deadline` is in the past, cleaning will be scheduled immediately.
  //
  // If `ScheduleCleaning()` is called again for the same cleanee with a pending
  // cleaning, its deadline can be reduced, but extending the deadline has no
  // effect.
  //
  // Thread safe.
  void ScheduleCleaning(Token token, absl::Time deadline);

  // Returns the current time according to the same clock that
  // `BackgroundCleaner` is using.
  //
  // Thread safe.
  absl::Time TimeNow();

 private:
  void CancelCleaningInternal(Token token)
      ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_);

  void ScheduleCleaningSlow(Token token, absl::Time deadline);

  void BackgroundThread();

  absl::Mutex mutex_;
  // Registered cleanees, partitioned so that `entries_` before `next_` do not
  // have pending cleaning and have `deadline == absl::InfiniteFuture()`, while
  // `entries_` at and after `next_` have pending cleaning and are sorted by
  // their `deadline` which is never `absl::InfiniteFuture()`.
  Entries entries_ ABSL_GUARDED_BY(mutex_);
  Entries::iterator next_ ABSL_GUARDED_BY(mutex_) = entries_.begin();
  // If not `nullptr`, this cleanee is currently being cleaned. This is used to
  // avoid a race between `Unregister()` and cleaning.
  BackgroundCleanee* current_cleanee_ ABSL_GUARDED_BY(mutex_) = nullptr;
  // If `true`, the next deadline might have been reduced since the background
  // thread started waiting for it. This wakes up the thread and lets it recheck
  // the next deadline.
  //
  // This is also used to request the thread to exit when `next_ == next_end()`.
  bool deadline_reduced_ ABSL_GUARDED_BY(mutex_) = false;
  // If `false`, the background thread is active. This is negated for easier
  // `absl::Condition()`.
  bool no_background_thread_ ABSL_GUARDED_BY(mutex_) = true;
};

// Implementation details follow.

inline void BackgroundCleaner::ScheduleCleaning(Token token,
                                                absl::Time deadline) {
  if (deadline == absl::InfiniteFuture()) return;
  ScheduleCleaningSlow(token, deadline);
}

inline absl::Time BackgroundCleaner::TimeNow() { return absl::Now(); }

}  // namespace riegeli

#endif  // RIEGELI_BASE_BACKGROUND_CLEANING_H_


================================================
FILE: riegeli/base/binary_search.h
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_BINARY_SEARCH_H_
#define RIEGELI_BASE_BINARY_SEARCH_H_

#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "riegeli/base/compare.h"

namespace riegeli {

// Explains the result of a binary search.
//
// Assumptions:
//  * All `less` positions precede all `equivalent` positions.
//  * All `equivalent` positions precede all `greater` positions.
//  * All `less` positions precede all `greater` positions,
//    even if there are no `equivalent` positions.
//
// Interpretation of the result of a binary search, depending on `ordering`:
//  * `equivalent` - There is some `equivalent` position,
//                   and `found` is some such position.
//  * `greater`    - There are no `equivalent` positions
//                   but there is some `greater` position,
//                   and `found` is the earliest such position.
//  * `less`       - There are no `equivalent` nor `greater` positions
//                   but there is some `less` position,
//                   and `found` is the end of the range to search.
//  * `unordered`  - All positions are `unordered`,
//                   and `found` is the end of the range to search.
template <typename Pos>
struct SearchResult {
  PartialOrdering ordering;
  Pos found;
};

// The `test()` parameter of `BinarySearch()` is a function which returns
// an ordering (a value comparable with literal 0, such as
// `{Partial,Strong}Ordering`, `{std,absl}::{partial,weak,strong}_ordering`,
// or `int`) or `SearchGuide<Traits::Pos>`.
//
// If the earliest interesting position after `current` can be found
// independently from `test(current)`, `test(current)` can return an ordering.
// The next position will be `traits.Next(current)`.
//
// If the earliest interesting position after `current` can be more easily found
// as a side effect of `test(current)`, `test(current)` can return
// `SearchGuide<Pos>`. If `ordering >= 0` (i.e. `ordering` is `equivalent` or
// `greater`), the associated `next` should be `current` (or another position
// to replace `current` with). Otherwise (i.e. `ordering` is `less` or
// `unordered`), the associated `next` should be the earliest interesting
// position after `current`.
template <typename Pos>
struct SearchGuide {
  template <typename Ordering,
            std::enable_if_t<IsOrdering<Ordering>::value, int> = 0>
  explicit SearchGuide(Ordering ordering, const Pos& next)
      : ordering(AsPartialOrdering(ordering)), next(next) {}
  template <typename Ordering,
            std::enable_if_t<IsOrdering<Ordering>::value, int> = 0>
  explicit SearchGuide(Ordering ordering, Pos&& next)
      : ordering(AsPartialOrdering(ordering)), next(std::move(next)) {}

  SearchGuide(const SearchGuide& that) = default;
  SearchGuide& operator=(const SearchGuide& that) = default;

  SearchGuide(SearchGuide&& that) = default;
  SearchGuide& operator=(SearchGuide&& that) = default;

  template <typename OtherPos,
            std::enable_if_t<std::is_convertible_v<OtherPos, Pos>, int> = 0>
  /*implicit*/ SearchGuide(const SearchGuide<OtherPos>& that)
      : ordering(that.ordering), next(that.next) {}
  template <typename OtherPos,
            std::enable_if_t<std::is_convertible_v<OtherPos, Pos>, int> = 0>
  SearchGuide& operator=(const SearchGuide<OtherPos>& that) {
    ordering = that.ordering;
    next = that.next;
    return *this;
  }

  template <typename OtherPos,
            std::enable_if_t<std::is_convertible_v<OtherPos, Pos>, int> = 0>
  /*implicit*/ SearchGuide(SearchGuide<OtherPos>&& that)
      : ordering(that.ordering), next(std::move(that.next)) {}
  template <typename OtherPos,
            std::enable_if_t<std::is_convertible_v<OtherPos, Pos>, int> = 0>
  SearchGuide& operator=(SearchGuide<OtherPos>&& that) {
    ordering = that.ordering;
    next = std::move(that.next);
    return *this;
  }

  PartialOrdering ordering;
  Pos next;
};

template <typename Pos, typename Ordering>
explicit SearchGuide(Ordering ordering, Pos next)
    -> SearchGuide<std::decay_t<Pos>>;

namespace binary_search_internal {

template <typename T, typename Pos, typename Enable = void>
struct IsSearchGuide : std::false_type {};

template <typename Pos, typename OtherPos>
struct IsSearchGuide<SearchGuide<OtherPos>, Pos>
    : std::is_convertible<OtherPos, Pos> {};

template <typename T, typename Pos>
struct IsOrderingOrSearchGuide
    : std::disjunction<IsOrdering<T>, IsSearchGuide<T, Pos>> {};

template <typename T, typename Pos>
struct IsOptionalOrderingOrSearchGuide : std::false_type {};

template <typename T, typename Pos>
struct IsOptionalOrderingOrSearchGuide<std::optional<T>, Pos>
    : IsOrderingOrSearchGuide<T, Pos> {};

template <typename T, typename Pos>
struct IsStatusOrOrderingOrSearchGuide : std::false_type {};

template <typename T, typename Pos>
struct IsStatusOrOrderingOrSearchGuide<absl::StatusOr<T>, Pos>
    : IsOrderingOrSearchGuide<T, Pos> {};

template <typename Test, typename Pos, typename Enable = void>
struct TestReturnsOrderingOrSearchGuide : std::false_type {};

template <typename Test, typename Pos>
struct TestReturnsOrderingOrSearchGuide<
    Test, Pos,
    std::enable_if_t<IsOrderingOrSearchGuide<
        decltype(std::declval<Test>()(std::declval<Pos>())), Pos>::value>>
    : std::true_type {};

template <typename Test, typename Pos, typename Enable = void>
struct TestReturnsOptionalOrderingOrSearchGuide : std::false_type {};

template <typename Test, typename Pos>
struct TestReturnsOptionalOrderingOrSearchGuide<
    Test, Pos,
    std::enable_if_t<IsOptionalOrderingOrSearchGuide<
        decltype(std::declval<Test>()(std::declval<Pos>())), Pos>::value>>
    : std::true_type {};

template <typename Test, typename Pos, typename Enable = void>
struct TestReturnsStatusOrOrderingOrSearchGuide : std::false_type {};

template <typename Test, typename Pos>
struct TestReturnsStatusOrOrderingOrSearchGuide<
    Test, Pos,
    std::enable_if_t<IsStatusOrOrderingOrSearchGuide<
        decltype(std::declval<Test>()(std::declval<Pos>())), Pos>::value>>
    : std::true_type {};

}  // namespace binary_search_internal

// Searches a sequence of elements for a desired element, or for a desired
// position between elements, given that it is possible to determine whether a
// given position is before or after the desired position.
//
// The `traits` parameter specifies the space of possible positions.
// See `DefaultSearchTraits` documentation for details. The default `traits` are
// `DefaultSearchTraits<Pos>()`.
//
// The `low` (inclusive) and `high` (exclusive) parameters specify the range to
// search.
//
// The `test()` function takes `current` of type `Traits::Pos` as a parameter
// and returns an ordering:
//  * `less`       - `current` is before the desired position.
//  * `equivalent` - `current` is desired, searching can stop.
//  * `greater`    - `current` is after the desired position.
//  * `unordered`  - It could not be determined which is the case. `current`
//                   will be skipped.
//
// Alternatively, `test()` can return `SearchGuide<Traits::Pos>`. See
// `SearchGuide` documentation for details.
//
// Preconditions:
//  * All `less` positions precede all `equivalent` positions.
//  * All `equivalent` positions precede all `greater` positions.
//  * All `less` positions precede all `greater` positions,
//    even if there are no `equivalent` positions.
//
// For interpretation of the result, see `SearchResult` documentation.
//
// To find the earliest `equivalent` position instead of an arbitrary one,
// `test()` can be changed to return `greater` in place of `equivalent`.
//
// Further guarantees:
//  * Each `traits.Next(current)` immediately follows a `test(current)` which
//    returned `less` or `unordered`.
//  * Each `test(current)` immediately follows a `traits.Next()` which returned
//    `current`, or a `test()` which returned a `SearchGuide` containing `less`
//    or `unordered` together with `current`, or a `traits.Middle()` which
//    returned `current`.
//  * If `test(current)` returns `equivalent`, `BinarySearch()` immediately
//    returns `current`.
//  * If `test(current)` returns `less`, `test()` will not be called again
//    with arguments before `current`.
//  * If `test(current)` returns `greater`, `test()` will not be called again
//    with arguments after `current`.
//  * `test()` will not be called again with the same argument.
//
// It follows that if a `test()` returns `equivalent` or `greater`,
// `BinarySearch()` returns the argument of the last `test()` call with one of
// these results. This allows to communicate additional context of an
// `equivalent` or `greater` result by a side effect of `test()`.
template <
    typename Pos, typename Test,
    std::enable_if_t<binary_search_internal::TestReturnsOrderingOrSearchGuide<
                         Test, Pos>::value,
                     int> = 0>
SearchResult<Pos> BinarySearch(Pos low, Pos high, Test&& test);
template <
    typename Traits, typename Test,
    std::enable_if_t<binary_search_internal::TestReturnsOrderingOrSearchGuide<
                         Test, typename Traits::Pos>::value,
                     int> = 0>
SearchResult<typename Traits::Pos> BinarySearch(typename Traits::Pos low,
                                                typename Traits::Pos high,
                                                Test&& test,
                                                const Traits& traits);

// A variant of `BinarySearch()` which supports cancellation.
//
// If a `test()` returns `std::nullopt`, `BinarySearch()` returns
// `std::nullopt`.
template <typename Pos, typename Test,
          std::enable_if_t<
              binary_search_internal::TestReturnsOptionalOrderingOrSearchGuide<
                  Test, Pos>::value,
              int> = 0>
std::optional<SearchResult<Pos>> BinarySearch(Pos low, Pos high, Test&& test);
template <typename Traits, typename Test,
          std::enable_if_t<
              binary_search_internal::TestReturnsOptionalOrderingOrSearchGuide<
                  Test, typename Traits::Pos>::value,
              int> = 0>
std::optional<SearchResult<typename Traits::Pos>> BinarySearch(
    typename Traits::Pos low, typename Traits::Pos high, Test&& test,
    const Traits& traits);

// A variant of `BinarySearch()` which supports cancellation with a `Status`.
//
// If a `test()` returns a failed `absl::StatusOr`, `BinarySearch()` returns
// the corresponding failed `absl::StatusOr`.
template <typename Pos, typename Test,
          std::enable_if_t<
              binary_search_internal::TestReturnsStatusOrOrderingOrSearchGuide<
                  Test, Pos>::value,
              int> = 0>
absl::StatusOr<SearchResult<Pos>> BinarySearch(Pos low, Pos high, Test&& test);
template <typename Traits, typename Test,
          std::enable_if_t<
              binary_search_internal::TestReturnsStatusOrOrderingOrSearchGuide<
                  Test, typename Traits::Pos>::value,
              int> = 0>
absl::StatusOr<SearchResult<typename Traits::Pos>> BinarySearch(
    typename Traits::Pos low, typename Traits::Pos high, Test&& test,
    const Traits& traits);

// The `traits` parameter of `BinarySearch()` specifies the space of positions
// to search.
//
// Some positions might be determined to be uninteresting, which means that for
// the purposes of the search they are equivalent to a nearby interesting
// position. They are skipped during the search.
//
// `DefaultSearchTraits<Pos>` might be appropriate for positions of an
// arithmetic type. If custom traits are needed instead, these comments specify
// generalized requirements of the traits.
template <typename T>
class DefaultSearchTraits {
 public:
  // Identifies a position between elements being searched. This type must be
  // copyable.
  using Pos = T;

  // Returns the earliest interesting position after `current`.
  //
  // `Next()` is used only if the `test()` parameter of `BinarySearch()` returns
  // an ordering. If `test()` returns `SearchGuide<Pos>`, the result of `test()`
  // provides the next position instead.
  //
  // Precondition: `test(current)` returned `less` or `unordered`.
  T Next(T current) const { return current + 1; }

  // Returns `true` if the range between `low` and `high` contains no positions.
  bool Empty(T low, T high) const { return low >= high; }

  // Returns a position in the range from `low` (inclusive) to `high`
  // (exclusive) which is approximately halfway between `low` and `high`.
  // Returns `std::nullopt` if the range contains no interesting positions.
  std::optional<T> Middle(T low, T high) const {
    if (low >= high) return std::nullopt;
    return low + (high - low) / 2;
  }
};

// Implementation details follow.

namespace binary_search_internal {

template <typename Traits, typename Ordering,
          std::enable_if_t<IsOrdering<Ordering>::value, int> = 0>
inline SearchGuide<typename Traits::Pos> GetSearchGuide(
    Ordering ordering, typename Traits::Pos&& pos, const Traits& traits) {
  return SearchGuide<typename Traits::Pos>(
      AsPartialOrdering(ordering),
      ordering >= 0 ? std::move(pos) : traits.Next(std::move(pos)));
}

template <typename Traits, typename OtherPos>
inline SearchGuide<typename Traits::Pos> GetSearchGuide(
    SearchGuide<OtherPos>&& guide,
    ABSL_ATTRIBUTE_UNUSED typename Traits::Pos&& pos,
    ABSL_ATTRIBUTE_UNUSED const Traits& traits) {
  return std::move(guide);
}

template <typename Pos, typename TestResult, typename Enable = void>
struct CancelSearch;

template <typename Pos, typename Ordering>
struct CancelSearch<Pos, Ordering,
                    std::enable_if_t<IsOrdering<Ordering>::value>> {
  static PartialOrdering DoCancel(ABSL_ATTRIBUTE_UNUSED const Pos& pos) {
    return PartialOrdering::equivalent;
  }
  static PartialOrdering DoNotCancel(Ordering ordering) {
    return AsPartialOrdering(ordering);
  }
};

template <typename Pos, typename OtherPos>
struct CancelSearch<Pos, SearchGuide<OtherPos>> {
  static SearchGuide<Pos> DoCancel(const Pos& pos) {
    return SearchGuide<Pos>(PartialOrdering::equivalent, pos);
  }
  static SearchGuide<Pos> DoNotCancel(SearchGuide<OtherPos>&& guide) {
    return std::move(guide);
  }
};

}  // namespace binary_search_internal

template <
    typename Pos, typename Test,
    std::enable_if_t<binary_search_internal::TestReturnsOrderingOrSearchGuide<
                         Test, Pos>::value,
                     int>>
inline SearchResult<Pos> BinarySearch(Pos low, Pos high, Test&& test) {
  return BinarySearch(std::move(low), std::move(high), std::forward<Test>(test),
                      DefaultSearchTraits<Pos>());
}

template <
    typename Traits, typename Test,
    std::enable_if_t<binary_search_internal::TestReturnsOrderingOrSearchGuide<
                         Test, typename Traits::Pos>::value,
                     int>>
inline SearchResult<typename Traits::Pos> BinarySearch(
    typename Traits::Pos low, typename Traits::Pos high, Test&& test,
    const Traits& traits) {
  // Invariants:
  //  * All positions between the original `low` and the current `low` are
  //    `less` or `unordered`.
  //  * All positions between the current `high` and the original `high` are
  //    `greater` or `unordered`.
  //
  // Invariants depending on `greater_result.ordering`:
  //  * `greater`   - `greater_result.found` is the first `greater` position
  //                  between the current `high` and the original `high`.
  //  * `less`      - There are no such positions but there are `less` positions
  //                  between the original `low` and the current `low`,
  //                  and `greater_result.found` is `high`.
  //  * `unordered` - There are no such positions either,
  //                  and `greater_result.found` is `high`.
  using Pos = typename Traits::Pos;
  SearchResult<Pos> greater_result = {PartialOrdering::unordered, high};

again:
  std::optional<Pos> middle_before_unordered = traits.Middle(low, high);
  if (middle_before_unordered == std::nullopt) return greater_result;
  Pos middle = *middle_before_unordered;
  // Invariant: all positions between `*middle_before_unordered` and `middle`
  // are `unordered`.
  bool unordered_found = false;
  for (;;) {
    auto test_result = test(middle);
    SearchGuide<Pos> guide = binary_search_internal::GetSearchGuide(
        std::move(test_result), std::move(middle), traits);
    if (guide.ordering < 0) {
      if (!(greater_result.ordering >= 0)) {
        greater_result.ordering = PartialOrdering::less;
      }
      low = std::move(guide.next);
      goto again;
    }
    if (guide.ordering == 0) {
      // Assign instead of returning for NRVO.
      greater_result.ordering = PartialOrdering::equivalent;
      greater_result.found = std::move(guide.next);
      return greater_result;
    }
    if (guide.ordering > 0) {
      greater_result.ordering = PartialOrdering::greater;
      greater_result.found = std::move(guide.next);
      if (unordered_found) break;
      // Use the position from `guide` instead of `*middle_before_unordered`
      // in case the guide provides an earlier upper bound.
      high = greater_result.found;
      goto again;
    }
    unordered_found = true;
    if (traits.Empty(guide.next, high)) break;
    middle = std::move(guide.next);
  }
  // Either a `greater` position was found after some `unordered` positions,
  // or all positions between `*middle_before_unordered` and `high` are
  // `unordered`.
  high = *std::move(middle_before_unordered);
  goto again;
}

template <typename Pos, typename Test,
          std::enable_if_t<
              binary_search_internal::TestReturnsOptionalOrderingOrSearchGuide<
                  Test, Pos>::value,
              int>>
inline std::optional<SearchResult<Pos>> BinarySearch(Pos low, Pos high,
                                                     Test&& test) {
  return BinarySearch(std::move(low), std::move(high), std::forward<Test>(test),
                      DefaultSearchTraits<Pos>());
}

template <typename Traits, typename Test,
          std::enable_if_t<
              binary_search_internal::TestReturnsOptionalOrderingOrSearchGuide<
                  Test, typename Traits::Pos>::value,
              int>>
inline std::optional<SearchResult<typename Traits::Pos>> BinarySearch(
    typename Traits::Pos low, typename Traits::Pos high, Test&& test,
    const Traits& traits) {
  bool cancelled = false;
  SearchResult<typename Traits::Pos> result = BinarySearch(
      std::move(low), std::move(high),
      [&](const typename Traits::Pos& pos) {
        auto test_result = test(pos);
        using Cancel = binary_search_internal::CancelSearch<
            typename Traits::Pos, std::decay_t<decltype(*test_result)>>;
        if (ABSL_PREDICT_FALSE(test_result == std::nullopt)) {
          cancelled = true;
          return Cancel::DoCancel(pos);
        }
        return Cancel::DoNotCancel(*std::move(test_result));
      },
      traits);
  if (ABSL_PREDICT_FALSE(cancelled)) return std::nullopt;
  return result;
}

template <typename Pos, typename Test,
          std::enable_if_t<
              binary_search_internal::TestReturnsStatusOrOrderingOrSearchGuide<
                  Test, Pos>::value,
              int>>
inline absl::StatusOr<SearchResult<Pos>> BinarySearch(Pos low, Pos high,
                                                      Test&& test) {
  return BinarySearch(std::move(low), std::move(high), std::forward<Test>(test),
                      DefaultSearchTraits<Pos>());
}

template <typename Traits, typename Test,
          std::enable_if_t<
              binary_search_internal::TestReturnsStatusOrOrderingOrSearchGuide<
                  Test, typename Traits::Pos>::value,
              int>>
inline absl::StatusOr<SearchResult<typename Traits::Pos>> BinarySearch(
    typename Traits::Pos low, typename Traits::Pos high, Test&& test,
    const Traits& traits) {
  absl::Status status;
  SearchResult<typename Traits::Pos> result = BinarySearch(
      std::move(low), std::move(high),
      [&](const typename Traits::Pos& pos) {
        auto test_result = test(pos);
        using Cancel = binary_search_internal::CancelSearch<
            typename Traits::Pos, std::decay_t<decltype(*test_result)>>;
        if (ABSL_PREDICT_FALSE(!test_result.ok())) {
          status = test_result.status();
          return Cancel::DoCancel(pos);
        }
        return Cancel::DoNotCancel(*std::move(test_result));
      },
      traits);
  if (ABSL_PREDICT_FALSE(!status.ok())) return status;
  return result;
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_BINARY_SEARCH_H_


================================================
FILE: riegeli/base/buffer.cc
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/base/buffer.h"

#include <ostream>

#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"

namespace riegeli {

void Buffer::DumpStructure(absl::string_view substr, std::ostream& dest) const {
  dest << "[buffer] {";
  if (!substr.empty()) {
    if (substr.data() != data()) {
      dest << " space_before: " << PtrDistance(data(), substr.data());
    }
    dest << " space_after: "
         << PtrDistance(substr.data() + substr.size(), data() + capacity());
  }
  dest << " }";
}

}  // namespace riegeli


================================================
FILE: riegeli/base/buffer.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_BUFFER_H_
#define RIEGELI_BASE_BUFFER_H_

#include <stddef.h>

#include <iosfwd>
#include <new>  // IWYU pragma: keep
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/estimated_allocated_size.h"
#include "riegeli/base/external_data.h"

namespace riegeli {

// Dynamically allocated byte buffer.
class ABSL_ATTRIBUTE_TRIVIAL_ABI Buffer {
 public:
  Buffer() = default;

  // Ensures at least `min_capacity` of space.
  explicit Buffer(size_t min_capacity);

  // The source `Buffer` is left deallocated.
  Buffer(Buffer&& that) noexcept;
  Buffer& operator=(Buffer&& that) noexcept;

  ~Buffer() { DeleteInternal(); }

  // Ensures at least `min_capacity` of space. Existing contents are lost.
  //
  // Drops the allocation if the resulting capacity would be wasteful for
  // `min_capacity`.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(size_t min_capacity = 0);

  // Returns the data pointer.
  char* data() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return data_; }

  // Returns the usable data size. It can be greater than the requested size.
  size_t capacity() const { return capacity_; }

  // Indicates support for `ExternalRef(Buffer&&, substr)`.
  friend void RiegeliSupportsExternalRef(Buffer*) {}

  // Supports `ExternalRef`.
  friend ExternalStorage RiegeliToExternalStorage(Buffer* self) {
    self->capacity_ = 0;
    return ExternalStorage(std::exchange(self->data_, nullptr),
                           [](void* ptr) { operator delete(ptr); });
  }

  // Supports `ExternalRef` and `Chain::Block`.
  friend void RiegeliDumpStructure(const Buffer* self, absl::string_view substr,
                                   std::ostream& dest) {
    self->DumpStructure(substr, dest);
  }

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const Buffer* self,
                                        MemoryEstimator& memory_estimator) {
    memory_estimator.RegisterDynamicMemory(self->data_, self->capacity_);
  }

 private:
  void AllocateInternal(size_t min_capacity);
  void DeleteInternal() { operator delete(data_, capacity_); }
  void DumpStructure(absl::string_view substr, std::ostream& dest) const;

  char* data_ = nullptr;
  size_t capacity_ = 0;
  // Invariant: if `data_ == nullptr` then `capacity_ == 0`
};

// Implementation details follow.

inline Buffer::Buffer(size_t min_capacity) { AllocateInternal(min_capacity); }

inline Buffer::Buffer(Buffer&& that) noexcept
    : data_(std::exchange(that.data_, nullptr)),
      capacity_(std::exchange(that.capacity_, 0)) {}

inline Buffer& Buffer::operator=(Buffer&& that) noexcept {
  // Exchange `that.data_` early to support self-assignment.
  char* const data = std::exchange(that.data_, nullptr);
  DeleteInternal();
  data_ = data;
  capacity_ = std::exchange(that.capacity_, 0);
  return *this;
}

inline void Buffer::Reset(size_t min_capacity) {
  if (data_ != nullptr) {
    if (capacity_ >= min_capacity && !Wasteful(capacity_, min_capacity)) return;
    DeleteInternal();
    data_ = nullptr;
    capacity_ = 0;
  }
  AllocateInternal(min_capacity);
}

inline void Buffer::AllocateInternal(size_t min_capacity) {
  if (min_capacity > 0) {
    const size_t capacity = EstimatedAllocatedSize(min_capacity);
    data_ = static_cast<char*>(operator new(capacity));
    capacity_ = capacity;
  }
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_BUFFER_H_


================================================
FILE: riegeli/base/buffering.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_BUFFERING_H_
#define RIEGELI_BASE_BUFFERING_H_

#include <stddef.h>

#include <optional>

#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/types.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// Typical bounds of sizes of memory blocks holding pieces of data in objects.
inline constexpr size_t kDefaultMinBlockSize = 512;
inline constexpr size_t kDefaultMaxBlockSize = size_t{64} << 10;

// When deciding whether to copy an array of bytes or share memory, prefer
// copying up to this length.
//
// Copying can often be done in an inlined fast path. Sharing has more overhead,
// especially in a virtual slow path, so copying sufficiently short lengths
// performs better.
inline constexpr size_t kMaxBytesToCopy = 511;

// Recommends the length of a buffer by modifying the base recommendation.
//
// If `pos` did not pass `size_hint` yet, returns the remaining length instead
// of `base_length`.
inline Position ApplySizeHint(Position base_length,
                              std::optional<Position> size_hint, Position pos) {
  if (size_hint != std::nullopt && pos <= *size_hint) return *size_hint - pos;
  return base_length;
}

// Recommends the length of a buffer by modifying the base recommendation.
//
// The following constraints are applied, in the order of weakest to strongest:
//  * At least `recommended_length`.
//  * At most `max_length`.
//  * At least `min_length`.
inline size_t ApplyBufferConstraints(Position base_length, size_t min_length,
                                     size_t recommended_length,
                                     size_t max_length) {
  return UnsignedClamp(UnsignedMax(base_length, recommended_length), min_length,
                       max_length);
}

// Heuristics for whether a data structure with `allocated` bytes utilizing
// `used` bytes for actual data is considered wasteful: `allocated` is larger
// than `2 * used + kDefaultMinBlockSize` (512).
inline bool Wasteful(size_t allocated, size_t used) {
  if (ABSL_PREDICT_FALSE(used > allocated)) return false;
  const size_t unused = allocated - used;
  if (ABSL_PREDICT_TRUE(unused <= kDefaultMinBlockSize)) return false;
  return unused - riegeli::kDefaultMinBlockSize > used;
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_BUFFERING_H_


================================================
FILE: riegeli/base/byte_fill.cc
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/base/byte_fill.h"

#include <stddef.h>
#include <stdint.h>

#include <cstring>
#include <ios>
#include <limits>
#include <ostream>
#include <utility>
#include <variant>

#include "absl/numeric/bits.h"
#include "absl/strings/cord.h"
#include "absl/strings/cord_buffer.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/cord_utils.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/global.h"
#include "riegeli/base/shared_buffer.h"
#include "riegeli/base/types.h"

namespace riegeli {

inline const char* ByteFill::ZeroBlock::Data() {
  return Global([] { return new char[kSize](); });
}

Chain::Block ByteFill::ZeroBlock::ToChainBlock(absl::string_view substr) {
  if (substr.size() == kSize) {
    return Global([] {
      return Chain::Block(ZeroBlock(), absl::string_view(Data(), kSize));
    });
  }
  return Chain::Block(ZeroBlock(), substr);
}

absl::Cord ByteFill::ZeroBlock::ToCord(absl::string_view substr) {
  static constexpr auto kNullReleaser = [] {};
  if (substr.size() == kSize) {
    return Global([] {
      return absl::MakeCordFromExternal(absl::string_view(Data(), kSize),
                                        kNullReleaser);
    });
  }
  return absl::MakeCordFromExternal(substr, kNullReleaser);
}

void ByteFill::ZeroBlock::DumpStructure(std::ostream& dest) {
  dest << "[zero_fill] { }";
}

void ByteFill::LargeBlock::DumpStructure(absl::string_view substr,
                                         std::ostream& dest) const {
  dest << "[large_fill] {";
  const size_t ref_count = buffer_.GetRefCount();
  if (ref_count != 1) dest << " ref_count: " << ref_count;
  if (buffer_.capacity() != substr.size()) {
    dest << " capacity: " << buffer_.capacity();
  }
  dest << " }";
}

ByteFill::Blocks::Blocks(Position size, char fill) {
  if (size == 0) return;
  if (fill == '\0') {
    RIEGELI_ASSERT(std::holds_alternative<ZeroBlock>(block_));
    num_blocks_ = (size - 1) / ZeroBlock::kSize + 1;
    non_last_block_size_ = uint32_t{ZeroBlock::kSize};
    last_block_size_ =
        static_cast<uint32_t>(size - 1) % uint32_t{ZeroBlock::kSize} + 1;
    data_ = ZeroBlock::Data();
    return;
  }
  if (size <= SmallBlock::kSize) {
    num_blocks_ = 1;
    last_block_size_ = IntCast<uint32_t>(size);
    data_ = block_.emplace<SmallBlock>(fill).data();
    return;
  }
  if (size <= kMaxSizeForSingleBlock) {
    num_blocks_ = 1;
    non_last_block_size_ = IntCast<uint32_t>(size);
    last_block_size_ = non_last_block_size_;
  } else {
    const int block_size_bits = SignedMin(
        (kBlockSizeBitsBias + IntCast<int>(absl::bit_width(size))) / 2, 16);
    num_blocks_ = ((size - 1) >> block_size_bits) + 1;
    non_last_block_size_ = uint32_t{1} << block_size_bits;
    last_block_size_ =
        (static_cast<uint32_t>(size - 1) & (non_last_block_size_ - 1)) + 1;
  }
  data_ = block_.emplace<LargeBlock>(non_last_block_size_, fill).data();
}

ByteFill::operator Chain() const {
  Chain dest;
  if (size_ <= (fill_ == '\0' ? Chain::kMaxBytesToCopyToEmpty
                              : Blocks::kMaxSizeForSingleBlock)) {
    if (size_ > 0) {
      const absl::Span<char> buffer = dest.AppendFixedBuffer(
          IntCast<size_t>(size_),
          Chain::Options().set_size_hint(IntCast<size_t>(size_)));
      std::memset(buffer.data(), fill_, buffer.size());
    }
  } else {
    RIEGELI_CHECK_LE(size_, std::numeric_limits<size_t>::max())
        << "Chain size overflow";
    Chain::Options options;
    options.set_size_hint(IntCast<size_t>(size_));
    const Blocks blocks = this->blocks();
    BlockIterator iter = blocks.cbegin();
    RIEGELI_ASSERT(iter != blocks.cend()) << "Empty ByteFill handled above";
    do {
      dest.Append(*iter, options);
    } while (++iter != blocks.cend());
  }
  return dest;
}

ByteFill::operator absl::Cord() const {
  absl::Cord dest;
  if (size_ <= UnsignedMin(fill_ == '\0'
                               ? cord_internal::kMaxBytesToCopyToEmptyCord
                               : Blocks::kMaxSizeForSingleBlock,
                           absl::CordBuffer::kDefaultLimit)) {
    if (size_ > 0) {
      absl::CordBuffer buffer =
          absl::CordBuffer::CreateWithDefaultLimit(IntCast<size_t>(size_));
      buffer.SetLength(IntCast<size_t>(size_));
      std::memset(buffer.data(), fill_, IntCast<size_t>(size_));
      dest.Append(std::move(buffer));
    }
  } else {
    RIEGELI_CHECK_LE(size_, std::numeric_limits<size_t>::max())
        << "Cord size overflow";
    const Blocks blocks = this->blocks();
    BlockIterator iter = blocks.cbegin();
    RIEGELI_ASSERT(iter != blocks.cend()) << "Empty ByteFill handled above";
    do {
      ExternalRef(*iter).AppendTo(dest);
    } while (++iter != blocks.cend());
  }
  return dest;
}

void ByteFill::AssignTo(Chain& dest) const {
  dest.Clear();
  if (size_ <= (fill_ == '\0' ? Chain::kMaxBytesToCopyToEmpty
                              : Blocks::kMaxSizeForSingleBlock)) {
    if (empty()) return;
    const absl::Span<char> buffer = dest.AppendFixedBuffer(
        IntCast<size_t>(size_),
        Chain::Options().set_size_hint(IntCast<size_t>(size_)));
    std::memset(buffer.data(), fill_, buffer.size());
  } else {
    RIEGELI_CHECK_LE(size_, std::numeric_limits<size_t>::max())
        << "Chain size overflow";
    Chain::Options options;
    options.set_size_hint(IntCast<size_t>(size_));
    const Blocks blocks = this->blocks();
    BlockIterator iter = blocks.cbegin();
    RIEGELI_ASSERT(iter != blocks.cend()) << "Empty ByteFill handled above";
    do {
      dest.Append(*iter, options);
    } while (++iter != blocks.cend());
  }
}

void ByteFill::AssignTo(absl::Cord& dest) const {
  if (size_ <= UnsignedMin(fill_ == '\0'
                               ? cord_internal::kMaxBytesToCopyToEmptyCord
                               : Blocks::kMaxSizeForSingleBlock,
                           absl::CordBuffer::kDefaultLimit)) {
    if (size_ == 0) {
      dest.Clear();
    } else {
      absl::CordBuffer buffer = dest.GetAppendBuffer(0, 0);
      dest.Clear();
      if (buffer.capacity() < IntCast<size_t>(size_)) {
        buffer =
            absl::CordBuffer::CreateWithDefaultLimit(IntCast<size_t>(size_));
      }
      buffer.SetLength(IntCast<size_t>(size_));
      std::memset(buffer.data(), fill_, IntCast<size_t>(size_));
      dest.Append(std::move(buffer));
    }
  } else {
    dest.Clear();
    RIEGELI_CHECK_LE(size_, std::numeric_limits<size_t>::max())
        << "Cord size overflow";
    const Blocks blocks = this->blocks();
    BlockIterator iter = blocks.cbegin();
    RIEGELI_ASSERT(iter != blocks.cend()) << "Empty ByteFill handled above";
    do {
      ExternalRef(*iter).AppendTo(dest);
    } while (++iter != blocks.cend());
  }
}

void ByteFill::AppendTo(Chain& dest) const {
  if (size_ <= (fill_ == '\0' ? dest.MaxBytesToCopy()
                              : Blocks::kMaxSizeForSingleBlock)) {
    size_t length = IntCast<size_t>(size_);
    while (length > 0) {
      const absl::Span<char> buffer = dest.AppendBuffer(1, length, length);
      std::memset(buffer.data(), fill_, buffer.size());
      length -= buffer.size();
    }
  } else {
    const Blocks blocks = this->blocks();
    BlockIterator iter = blocks.cbegin();
    RIEGELI_ASSERT(iter != blocks.cend()) << "Empty ByteFill handled above";
    do {
      dest.Append(*iter);
    } while (++iter != blocks.cend());
  }
}

void ByteFill::AppendTo(Chain& dest, Chain::Options options) const {
  if (size_ <= (fill_ == '\0' ? dest.MaxBytesToCopy(options)
                              : Blocks::kMaxSizeForSingleBlock)) {
    size_t length = IntCast<size_t>(size_);
    while (length > 0) {
      const absl::Span<char> buffer =
          dest.AppendBuffer(1, length, length, options);
      std::memset(buffer.data(), fill_, buffer.size());
      length -= buffer.size();
    }
  } else {
    const Blocks blocks = this->blocks();
    BlockIterator iter = blocks.cbegin();
    RIEGELI_ASSERT(iter != blocks.cend()) << "Empty ByteFill handled above";
    do {
      dest.Append(*iter, options);
    } while (++iter != blocks.cend());
  }
}

void ByteFill::AppendTo(absl::Cord& dest) const {
  if (size_ <= UnsignedMin(fill_ == '\0'
                               ? cord_internal::MaxBytesToCopyToCord(dest)
                               : Blocks::kMaxSizeForSingleBlock,
                           absl::CordBuffer::kDefaultLimit)) {
    size_t length = IntCast<size_t>(size_);
    if (length == 0) return;
    {
      absl::CordBuffer buffer = dest.GetAppendBuffer(0, 1);
      const size_t existing_length = buffer.length();
      if (existing_length > 0) {
        buffer.SetLength(
            UnsignedMin(existing_length + length, buffer.capacity()));
        std::memset(buffer.data() + existing_length, fill_,
                    buffer.length() - existing_length);
        length -= buffer.length() - existing_length;
        dest.Append(std::move(buffer));
        if (length == 0) return;
      }
    }
    absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(length);
    buffer.SetLength(length);
    std::memset(buffer.data(), fill_, length);
    dest.Append(std::move(buffer));
  } else {
    RIEGELI_CHECK_LE(size_, std::numeric_limits<size_t>::max() - dest.size())
        << "Cord size overflow";
    const Blocks blocks = this->blocks();
    BlockIterator iter = blocks.cbegin();
    RIEGELI_ASSERT(iter != blocks.cend()) << "Empty ByteFill handled above";
    do {
      ExternalRef(*iter).AppendTo(dest);
    } while (++iter != blocks.cend());
  }
}

void ByteFill::PrependTo(Chain& dest) const {
  if (size_ <= (fill_ == '\0' ? dest.MaxBytesToCopy()
                              : Blocks::kMaxSizeForSingleBlock)) {
    size_t length = IntCast<size_t>(size_);
    while (length > 0) {
      const absl::Span<char> buffer = dest.PrependBuffer(1, length, length);
      std::memset(buffer.data(), fill_, buffer.size());
      length -= buffer.size();
    }
  } else {
    const Blocks blocks = this->blocks();
    BlockIterator iter = blocks.cend();
    RIEGELI_ASSERT(iter != blocks.cbegin()) << "Empty ByteFill handled above";
    do {
      --iter;
      dest.Prepend(*iter);
    } while (iter != blocks.cbegin());
  }
}

void ByteFill::PrependTo(Chain& dest, Chain::Options options) const {
  if (size_ <= (fill_ == '\0' ? dest.MaxBytesToCopy(options)
                              : Blocks::kMaxSizeForSingleBlock)) {
    size_t length = IntCast<size_t>(size_);
    while (length > 0) {
      const absl::Span<char> buffer =
          dest.PrependBuffer(1, length, length, options);
      std::memset(buffer.data(), fill_, buffer.size());
      length -= buffer.size();
    }
  } else {
    const Blocks blocks = this->blocks();
    BlockIterator iter = blocks.cend();
    RIEGELI_ASSERT(iter != blocks.cbegin()) << "Empty ByteFill handled above";
    do {
      --iter;
      dest.Prepend(*iter, options);
    } while (iter != blocks.cbegin());
  }
}

void ByteFill::PrependTo(absl::Cord& dest) const {
  if (size_ <= UnsignedMin(fill_ == '\0'
                               ? cord_internal::MaxBytesToCopyToCord(dest)
                               : Blocks::kMaxSizeForSingleBlock,
                           absl::CordBuffer::kDefaultLimit)) {
    if (empty()) return;
    absl::CordBuffer buffer =
        absl::CordBuffer::CreateWithDefaultLimit(IntCast<size_t>(size_));
    buffer.SetLength(IntCast<size_t>(size_));
    std::memset(buffer.data(), fill_, IntCast<size_t>(size_));
    dest.Prepend(std::move(buffer));
  } else {
    RIEGELI_CHECK_LE(size_, std::numeric_limits<size_t>::max() - dest.size())
        << "Cord size overflow";
    const Blocks blocks = this->blocks();
    BlockIterator iter = blocks.cend();
    RIEGELI_ASSERT(iter != blocks.cbegin()) << "Empty ByteFill handled above";
    do {
      --iter;
      ExternalRef(*iter).PrependTo(dest);
    } while (iter != blocks.cbegin());
  }
}

void ByteFill::Output(std::ostream& dest) const {
  for (const absl::string_view fragment : blocks()) {
    dest.write(fragment.data(), IntCast<std::streamsize>(fragment.size()));
  }
}

}  // namespace riegeli


================================================
FILE: riegeli/base/byte_fill.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_BYTE_FILL_H_
#define RIEGELI_BASE_BYTE_FILL_H_

#include <stddef.h>
#include <stdint.h>

#include <cstring>
#include <iosfwd>
#include <iterator>
#include <limits>
#include <utility>
#include <variant>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/external_data.h"
#include "riegeli/base/iterable.h"
#include "riegeli/base/shared_buffer.h"
#include "riegeli/base/types.h"

namespace riegeli {

// Represents a byte sequence of the given size with all bytes equal to the
// given value.
class ByteFill {
 public:
  class BlockRef;
  class BlockIterator;
  class Blocks;

  // Constructs a `ByteFill` with `size` occurrences of `fill`.
  explicit ByteFill(Position size, char fill = '\0')
      : size_(size), fill_(fill) {}

  ByteFill(const ByteFill& that) = default;
  ByteFill& operator=(const ByteFill& that) = default;

  bool empty() const { return size() == 0; }
  Position size() const { return size_; }
  char fill() const { return fill_; }

  // Removes `difference` occurrences, and returns `ByteFill` corresponding
  // to the removed fragment.
  //
  // Precondition: `difference <= size()`
  ByteFill Extract(Position difference) {
    RIEGELI_ASSERT_LE(difference, size_)
        << "Failed precondition of ByteFill::Extract(): size underflow";
    size_ -= difference;
    return ByteFill(difference, fill_);
  }

  // A sequence of non-empty `absl::string_view` blocks comprising data of the
  // `ByteFill`.
  Blocks blocks() const;

  // Converts the data to `Chain`.
  explicit operator Chain() const;

  // Converts the data to `absl::Cord`.
  explicit operator absl::Cord() const;

  // Supports `riegeli::Reset(Chain&, ByteFill)`.
  friend void RiegeliReset(Chain& dest, ByteFill src) { src.AssignTo(dest); }

  // Supports `riegeli::Reset(absl::Cord&, ByteFill)`.
  friend void RiegeliReset(absl::Cord& dest, ByteFill src) {
    src.AssignTo(dest);
  }

  // Appends the data to `dest`.
  void AppendTo(Chain& dest) const;
  void AppendTo(Chain& dest, Chain::Options options) const;

  // Appends the data to `dest`.
  void AppendTo(absl::Cord& dest) const;

  // Prepends the data to `dest`.
  void PrependTo(Chain& dest) const;
  void PrependTo(Chain& dest, Chain::Options options) const;

  // Prepends the data to `dest`.
  void PrependTo(absl::Cord& dest) const;

  // Default stringification by `absl::StrCat()` etc.
  template <typename Sink>
  friend void AbslStringify(Sink& dest, ByteFill src) {
    Position length = src.size_;
    while (ABSL_PREDICT_FALSE(length > std::numeric_limits<size_t>::max())) {
      dest.Append(std::numeric_limits<size_t>::max(), src.fill_);
      length -= std::numeric_limits<size_t>::max();
    }
    if (length > 0) dest.Append(IntCast<size_t>(length), src.fill_);
  }

  // Writes the occurrences to `out` as unformatted bytes.
  friend std::ostream& operator<<(std::ostream& dest, ByteFill src) {
    src.Output(dest);
    return dest;
  }

 private:
  class ZeroBlock;
  class SmallBlock;
  class LargeBlock;

  void AssignTo(Chain& dest) const;
  void AssignTo(absl::Cord& dest) const;
  void Output(std::ostream& dest) const;

  Position size_;
  char fill_;
};

// Represents a block of zeros backed by a shared array for `ExternalRef`.
class ByteFill::ZeroBlock {
 public:
  static constexpr size_t kSize = size_t{64} << 10;

  static const char* Data();

  ZeroBlock() = default;

  ZeroBlock(const ZeroBlock& that) = default;
  ZeroBlock& operator=(const ZeroBlock& that) = default;

  // Supports `ExternalRef`.
  friend Chain::Block RiegeliToChainBlock(
      ABSL_ATTRIBUTE_UNUSED const ZeroBlock* self, absl::string_view substr) {
    return ToChainBlock(substr);
  }

  // Supports `ExternalRef`.
  friend absl::Cord RiegeliToCord(ABSL_ATTRIBUTE_UNUSED const ZeroBlock* self,
                                  absl::string_view substr) {
    return ToCord(substr);
  }

  // Supports `ExternalRef`.
  friend ExternalStorage RiegeliToExternalStorage(
      ABSL_ATTRIBUTE_UNUSED const ZeroBlock* self) {
    return ExternalStorage(nullptr, [](ABSL_ATTRIBUTE_UNUSED void* ptr) {});
  }

  // Supports `ExternalRef` and `Chain::Block`.
  friend void RiegeliDumpStructure(ABSL_ATTRIBUTE_UNUSED const ZeroBlock* self,
                                   std::ostream& dest) {
    DumpStructure(dest);
  }

 private:
  static Chain::Block ToChainBlock(absl::string_view substr);
  static absl::Cord ToCord(absl::string_view substr);
  static void DumpStructure(std::ostream& dest);
};

class ByteFill::SmallBlock {
 public:
  static constexpr size_t kSize = 64;

  explicit SmallBlock(char fill) { std::memset(data_, fill, kSize); }

  SmallBlock(const SmallBlock& that) = default;
  SmallBlock& operator=(const SmallBlock& that) = default;

  const char* data() const { return data_; }

  // Supports `ExternalRef`.
  friend bool RiegeliExternalCopy(
      ABSL_ATTRIBUTE_UNUSED const SmallBlock* self) {
    return true;
  }

 private:
  char data_[kSize];
};

class ByteFill::LargeBlock {
 public:
  explicit LargeBlock(size_t size, char fill) : buffer_(size) {
    std::memset(buffer_.mutable_data(), fill, size);
  }

  LargeBlock(const LargeBlock& that) = default;
  LargeBlock& operator=(const LargeBlock& that) = default;

  LargeBlock(LargeBlock&& that) = default;
  LargeBlock& operator=(LargeBlock&& that) = default;

  const char* data() const { return buffer_.data(); }

  // Indicates support for:
  //  * `ExternalRef(const LargeBlock&, substr)`
  //  * `ExternalRef(LargeBlock&&, substr)`
  friend void RiegeliSupportsExternalRef(const LargeBlock*) {}

  // Supports `ExternalRef`.
  friend ExternalStorage RiegeliToExternalStorage(LargeBlock* self) {
    return RiegeliToExternalStorage(&self->buffer_);
  }

  // Supports `ExternalRef` and `Chain::Block`.
  friend void RiegeliDumpStructure(const LargeBlock* self,
                                   absl::string_view substr,
                                   std::ostream& dest) {
    self->DumpStructure(substr, dest);
  }

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const LargeBlock* self,
                                        MemoryEstimator& memory_estimator) {
    memory_estimator.RegisterSubobjects(&self->buffer_);
  }

 private:
  void DumpStructure(absl::string_view substr, std::ostream& dest) const;

  SharedBuffer buffer_;
};

class ByteFill::BlockRef {
 public:
  BlockRef(const BlockRef& that) = default;
  BlockRef& operator=(const BlockRef& that) = default;

  /*implicit*/ operator absl::string_view() const {
    return absl::string_view(data(), size());
  }

  bool empty() const { return false; }
  const char* data() const;
  size_t size() const;

  // Indicates support for:
  //  * `ExternalRef(BlockRef)`
  //  * `ExternalRef(BlockRef, substr)`
  friend void RiegeliSupportsExternalRef(const BlockRef*) {}

  // Supports `ExternalRef`.
  template <typename Callback>
  friend void RiegeliExternalDelegate(const BlockRef* self,
                                      absl::string_view substr,
                                      Callback&& delegate_to) {
    self->ExternalDelegate(substr, std::forward<Callback>(delegate_to));
  }

 private:
  friend class ByteFill;  // For `BlockRef()`.

  explicit BlockRef(const ByteFill::Blocks* blocks,
                    Position block_index_complement)
      : blocks_(blocks), block_index_complement_(block_index_complement) {}

  template <typename Callback>
  void ExternalDelegate(absl::string_view substr, Callback&& delegate_to) const;

  const Blocks* blocks_;
  // `block_index_complement_` is `blocks_->num_blocks_ - block_index`. Working
  // with the complement makes it easier to handle special case at 1 (a block
  // with size `blocks_->last_block_size_`).
  Position block_index_complement_;
};

class ByteFill::BlockIterator : public WithCompare<BlockIterator> {
 public:
  using iterator_concept = std::random_access_iterator_tag;
  // `iterator_category` is only `std::input_iterator_tag` because the
  // `LegacyForwardIterator` requirement and above require `reference` to be
  // a true reference type.
  using iterator_category = std::input_iterator_tag;
  using value_type = BlockRef;
  using reference = value_type;
  using pointer = ArrowProxy<reference>;
  using difference_type = ptrdiff_t;

  BlockIterator() = default;

  BlockIterator(const BlockIterator& that) = default;
  BlockIterator& operator=(const BlockIterator& that) = default;

  reference operator*() const;
  pointer operator->() const;
  BlockIterator& operator++();
  BlockIterator operator++(int);
  BlockIterator& operator--();
  BlockIterator operator--(int);
  BlockIterator& operator+=(difference_type n);
  BlockIterator operator+(difference_type n) const;
  BlockIterator& operator-=(difference_type n);
  BlockIterator operator-(difference_type n) const;
  reference operator[](difference_type n) const;

  friend bool operator==(BlockIterator a, BlockIterator b) {
    RIEGELI_ASSERT_EQ(a.blocks_, b.blocks_)
        << "Failed precondition of operator==(ByteFill::BlockIterator): "
           "incomparable iterators";
    return b.block_index_complement_ == a.block_index_complement_;
  }
  friend StrongOrdering RIEGELI_COMPARE(BlockIterator a, BlockIterator b) {
    RIEGELI_ASSERT_EQ(a.blocks_, b.blocks_)
        << "Failed precondition of operator<=>(ByteFill::BlockIterator): "
           "incomparable iterators";
    return riegeli::Compare(b.block_index_complement_,
                            a.block_index_complement_);
  }
  friend difference_type operator-(BlockIterator a, BlockIterator b) {
    RIEGELI_ASSERT_EQ(a.blocks_, b.blocks_)
        << "Failed precondition of operator-(ByteFill::BlockIterator): "
           "incomparable iterators";
    return b.block_index_complement_ - a.block_index_complement_;
  }
  friend BlockIterator operator+(difference_type n, BlockIterator a) {
    return a + n;
  }

 private:
  friend class ByteFill;  // For `BlockIterator()`.

  explicit BlockIterator(const Blocks* blocks, Position block_index_complement)
      : blocks_(blocks), block_index_complement_(block_index_complement) {}

  const Blocks* blocks_ = nullptr;
  // `block_index_complement_` is `blocks_->num_blocks_ - block_index`. Working
  // with the complement makes it easier to handle special cases at 0 (`end()`)
  // and 1 (a block with size `blocks_->last_block_size_`).
  Position block_index_complement_ = 0;
};

class ByteFill::Blocks {
 public:
  using value_type = BlockRef;
  using reference = value_type;
  using const_reference = reference;
  using iterator = BlockIterator;
  using const_iterator = iterator;
  using reverse_iterator = std::reverse_iterator<iterator>;
  using const_reverse_iterator = reverse_iterator;
  using size_type = size_t;
  using difference_type = ptrdiff_t;

  Blocks() = default;

  Blocks(Blocks&& that) noexcept;
  Blocks& operator=(Blocks&&) = delete;

  iterator begin() const { return iterator(this, num_blocks_); }
  iterator cbegin() const { return begin(); }
  iterator end() const { return iterator(this, 0); }
  iterator cend() const { return end(); }

  reverse_iterator rbegin() const { return reverse_iterator(end()); }
  reverse_iterator crbegin() const { return rbegin(); }
  reverse_iterator rend() const { return reverse_iterator(begin()); }
  reverse_iterator crend() const { return rend(); }

  bool empty() const { return size() == 0; }
  size_type size() const { return num_blocks_; }

  reference operator[](size_type n) const {
    RIEGELI_ASSERT_LT(n, size())
        << "Failed precondition of ByteFill::Blocks::operator[]: "
           "block index out of range";
    return BlockRef(this, num_blocks_ - n);
  }
  reference at(size_type n) const {
    RIEGELI_CHECK_LT(n, size())
        << "Failed precondition of ByteFill::Blocks::at(): "
           "block index out of range";
    return BlockRef(this, num_blocks_ - n);
  }
  reference front() const {
    RIEGELI_ASSERT(!empty())
        << "Failed precondition of ByteFill::Blocks::front(): no blocks";
    return BlockRef(this, num_blocks_);
  }
  reference back() const {
    RIEGELI_ASSERT(!empty())
        << "Failed precondition of ByteFill::Blocks::back(): no blocks";
    return BlockRef(this, 1);
  }

 private:
  // For `kMaxSizeForSingleBlock`, `Blocks()`, `data()`, `size()`, and
  // `ExternalDelegate()`.
  friend class ByteFill;

  // Find a balance between the number of blocks and the block size.
  // The following parameters yield:
  //  *    1K =   1 *  1K
  //  *    2K =   1 *  2K
  //  *    4K =   2 *  2K
  //  *    8K =   2 *  4K
  //  *   16K =   4 *  4K
  //  *   32K =   4 *  8K
  //  *   64K =   8 *  8K
  //  *  128K =   8 * 16K
  //  *  256K =  16 * 16K
  //  *  512K =  16 * 32K
  //  * 1M    =  32 * 32K
  //  * 2M    =  32 * 64K
  //  * 4M    =  64 * 64K
  //  * 8M    = 128 * 64K
  static constexpr int kBlockSizeBitsBias = 10;
  static constexpr Position kMaxSizeForSingleBlock =
      Position{1} << (kBlockSizeBitsBias + 1);

  explicit Blocks(Position size, char fill);

  const char* data() const { return data_; }
  size_t size(Position block_index_complement) const {
    return block_index_complement == 1 ? last_block_size_
                                       : non_last_block_size_;
  }

  template <typename Callback>
  void ExternalDelegate(absl::string_view substr, Callback&& delegate_to) const;

  Position num_blocks_ = 0;
  uint32_t non_last_block_size_ = 0;
  uint32_t last_block_size_ = 0;
  // If `num_blocks_ > 0` then `data_` is:
  //  * When `block_` is `ZeroBlock`:  `ZeroBlock::Data()`
  //  * When `block_` is `SmallBlock`: `small_block.data()`
  //  * When `block_` is `LargeBlock`: `large_block.data()`
  const char* data_ = nullptr;
  std::variant<ZeroBlock, SmallBlock, LargeBlock> block_;
};

// Implementation details follow.

inline const char* ByteFill::BlockRef::data() const { return blocks_->data(); }

inline size_t ByteFill::BlockRef::size() const {
  return blocks_->size(block_index_complement_);
}

template <typename Callback>
inline void ByteFill::BlockRef::ExternalDelegate(absl::string_view substr,
                                                 Callback&& delegate_to) const {
  blocks_->ExternalDelegate(substr, std::forward<Callback>(delegate_to));
}

inline ByteFill::BlockIterator::reference ByteFill::BlockIterator::operator*()
    const {
  RIEGELI_ASSERT_GT(block_index_complement_, 0u)
      << "Failed precondition of ByteFill::BlockIterator::operator*: "
         "iterator is end()";
  return BlockRef(blocks_, block_index_complement_);
}

inline ByteFill::BlockIterator::pointer ByteFill::BlockIterator::operator->()
    const {
  return pointer(**this);
}

inline ByteFill::BlockIterator& ByteFill::BlockIterator::operator++() {
  RIEGELI_ASSERT_GT(block_index_complement_, 0u)
      << "Failed precondition of ByteFill::BlockIterator::operator++: "
         "iterator is end()";
  --block_index_complement_;
  return *this;
}

inline ByteFill::BlockIterator ByteFill::BlockIterator::operator++(int) {
  const BlockIterator tmp = *this;
  ++*this;
  return tmp;
}

inline ByteFill::BlockIterator& ByteFill::BlockIterator::operator--() {
  RIEGELI_ASSERT_LT(block_index_complement_, blocks_->size())
      << "Failed precondition of ByteFill::BlockIterator::operator--: "
         "iterator is begin()";
  ++block_index_complement_;
  return *this;
}

inline ByteFill::BlockIterator ByteFill::BlockIterator::operator--(int) {
  const BlockIterator tmp = *this;
  --*this;
  return tmp;
}

inline ByteFill::BlockIterator& ByteFill::BlockIterator::operator+=(
    difference_type n) {
  if (n >= 0) {
    RIEGELI_ASSERT_LE(UnsignedCast(n), block_index_complement_)
        << "Failed precondition of ByteFill::BlockIterator::operator+=: "
           "iterator after end()";
  } else {
    RIEGELI_ASSERT_LE(NegatingUnsignedCast(n),
                      blocks_->size() - block_index_complement_)
        << "Failed precondition of ByteFill::BlockIterator::operator+=: "
           "iterator before begin()";
  }
  block_index_complement_ -= static_cast<Position>(n);
  return *this;
}

inline ByteFill::BlockIterator ByteFill::BlockIterator::operator+(
    difference_type n) const {
  return BlockIterator(*this) += n;
}

inline ByteFill::BlockIterator& ByteFill::BlockIterator::operator-=(
    difference_type n) {
  if (n >= 0) {
    RIEGELI_ASSERT_LE(UnsignedCast(n),
                      blocks_->size() - block_index_complement_)
        << "Failed precondition of ByteFill::BlockIterator::operator-=: "
           "iterator before begin()";
  } else {
    RIEGELI_ASSERT_LE(NegatingUnsignedCast(n), block_index_complement_)
        << "Failed precondition of ByteFill::BlockIterator::operator-=: "
           "iterator after end()";
  }
  block_index_complement_ += static_cast<Position>(n);
  return *this;
}

inline ByteFill::BlockIterator ByteFill::BlockIterator::operator-(
    difference_type n) const {
  return BlockIterator(*this) -= n;
}

inline ByteFill::BlockIterator::reference ByteFill::BlockIterator::operator[](
    difference_type n) const {
  return *(*this + n);
}

inline ByteFill::Blocks::Blocks(Blocks&& that) noexcept
    : num_blocks_(std::exchange(that.num_blocks_, 0)),
      last_block_size_(that.last_block_size_),
      data_(that.data_),
      block_(std::move(that.block_)) {
  if (SmallBlock* const small_block = std::get_if<SmallBlock>(&block_)) {
    data_ = small_block->data();
  }
}

template <typename Callback>
inline void ByteFill::Blocks::ExternalDelegate(absl::string_view substr,
                                               Callback&& delegate_to) const {
  struct Visitor {
    void operator()(const ZeroBlock& zero_ref) const {
      std::forward<Callback>(delegate_to)(zero_ref, substr);
    }
    void operator()(const SmallBlock& small_block) const {
      std::forward<Callback>(delegate_to)(small_block, substr);
    }
    void operator()(const LargeBlock& large_block) const {
      std::forward<Callback>(delegate_to)(large_block, substr);
    }

    absl::string_view substr;
    Callback&& delegate_to;
  };
  std::visit(Visitor{substr, std::forward<Callback>(delegate_to)}, block_);
}

inline ByteFill::Blocks ByteFill::blocks() const {
  return Blocks(size_, fill_);
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_BYTE_FILL_H_


================================================
FILE: riegeli/base/bytes_ref.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_BYTES_REF_H_
#define RIEGELI_BASE_BYTES_REF_H_

#include <stddef.h>

#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/string_ref.h"
#include "riegeli/base/temporary_storage.h"
#include "riegeli/base/type_traits.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// `BytesRef` stores an `absl::string_view` representing text or binary data
// (see `StringRef` for text data), possibly converted from
// `absl::Span<const char>` or temporary `std::string`.
//
// It is intended for function parameters when the implementation needs
// an `absl::string_view`, and the caller might have another representation
// of the string.
//
// It is convertible from:
//  * types convertible to `absl::string_view`
//  * types convertible to `std::string`, e.g. `BytesInitializer`
//  * types convertible to `absl::Span<const char>`,
//    e.g. `std::vector<char>` or `std::array<char, length>`.
//
// `BytesRef` does not own string contents and is efficiently copyable.
class BytesRef : public StringRef, public WithCompare<BytesRef> {
 public:
  // Stores an empty `absl::string_view`.
  BytesRef() = default;

  // Stores `str` converted to `absl::string_view`.
  ABSL_ATTRIBUTE_ALWAYS_INLINE
  /*implicit*/ BytesRef(const char* str ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : StringRef(absl::string_view(str)) {}

  // Stores `str` converted to `StringRef` and then to `absl::string_view`.
  template <typename T,
            std::enable_if_t<
                std::conjunction_v<NotSameRef<BytesRef, T>,
                                   std::is_convertible<T&&, absl::string_view>>,
                int> = 0>
  /*implicit*/ BytesRef(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : StringRef(std::forward<T>(str)) {}

  // Stores `str` converted to `absl::string_view`.
  /*implicit*/ BytesRef(
      absl::Span<const char> str ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : StringRef(absl::string_view(str.data(), str.size())) {}

  // Stores `str` materialized, then converted to `StringRef` and then to
  // `absl::string_view`.
  template <typename T,
            std::enable_if_t<
                std::conjunction_v<
                    NotSameRef<BytesRef, T>,
                    std::negation<std::is_convertible<T&&, absl::string_view>>,
                    std::is_convertible<T&&, std::string>>,
                int> = 0>
  /*implicit*/ BytesRef(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND,
                        TemporaryStorage<std::string>&& storage
                            ABSL_ATTRIBUTE_LIFETIME_BOUND = {})
      : StringRef(std::forward<T>(str), std::move(storage)) {}

  // Stores `str` converted to `absl::Span<const char>` and then to
  // `absl::string_view`.
  template <
      typename T,
      std::enable_if_t<
          std::conjunction_v<NotSameRef<BytesRef, T>,
                             std::negation<std::is_convertible<T&&, StringRef>>,
                             NotSameRef<absl::Span<const char>, T>,
                             std::is_convertible<T&&, absl::Span<const char>>>,
          int> = 0>
  /*implicit*/ BytesRef(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : BytesRef(absl::Span<const char>(std::forward<T>(str))) {}

  BytesRef(const BytesRef& that) = default;
  BytesRef& operator=(const BytesRef&) = delete;

  friend bool operator==(BytesRef a, BytesRef b) {
    return absl::string_view(a) == absl::string_view(b);
  }
  friend riegeli::StrongOrdering RIEGELI_COMPARE(BytesRef a, BytesRef b) {
    return riegeli::Compare(absl::string_view(a), absl::string_view(b));
  }

  template <
      typename T,
      std::enable_if_t<std::conjunction_v<NotSameRef<BytesRef, T>,
                                          std::is_convertible<T&&, StringRef>>,
                       int> = 0>
  friend bool operator==(BytesRef a, T&& b) {
    return a == BytesRef(std::forward<T>(b));
  }
  template <
      typename T,
      std::enable_if_t<std::conjunction_v<NotSameRef<BytesRef, T>,
                                          std::is_convertible<T&&, StringRef>>,
                       int> = 0>
  friend riegeli::StrongOrdering RIEGELI_COMPARE(BytesRef a, T&& b) {
    return riegeli::Compare(a, BytesRef(std::forward<T>(b)));
  }

  // `absl::Span<const char>` is already comparable against types convertible to
  // `absl::Span<const char>`, which includes `BytesRef`.
};

// `BytesInitializer` is convertible from the same types as `BytesRef`,
// but efficiently takes ownership of `std::string`.
//
// `BytesInitializer` behaves like `Initializer<std::string>`.
class BytesInitializer : public Initializer<std::string> {
 public:
  BytesInitializer() = default;

  // Stores `str` converted to `absl::string_view` and then to `std::string`.
  ABSL_ATTRIBUTE_ALWAYS_INLINE
  /*implicit*/ BytesInitializer(const char* str ABSL_ATTRIBUTE_LIFETIME_BOUND,
                                TemporaryStorage<MakerType<absl::string_view>>&&
                                    storage ABSL_ATTRIBUTE_LIFETIME_BOUND = {})
      : Initializer(std::move(storage).emplace(absl::string_view(str))) {}

  // Stores `str` converted to `std::string`.
  template <typename T,
            std::enable_if_t<
                std::conjunction_v<NotSameRef<BytesInitializer, T>,
                                   std::is_convertible<T&&, std::string>>,
                int> = 0>
  /*implicit*/ BytesInitializer(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : Initializer(std::forward<T>(str)) {}

  // Stores `str` converted to `BytesRef`, then to `absl::string_view`, and then
  // to `std::string`.
  template <
      typename T,
      std::enable_if_t<std::conjunction_v<
                           NotSameRef<BytesInitializer, T>,
                           std::negation<std::is_convertible<T&&, std::string>>,
                           std::is_convertible<T&&, BytesRef>>,
                       int> = 0>
  /*implicit*/ BytesInitializer(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND,
                                TemporaryStorage<MakerType<absl::string_view>>&&
                                    storage ABSL_ATTRIBUTE_LIFETIME_BOUND = {})
      : Initializer(
            std::move(storage).emplace(BytesRef(std::forward<T>(str)))) {}

  BytesInitializer(BytesInitializer&& that) = default;
  BytesInitializer& operator=(BytesInitializer&&) = delete;
};

}  // namespace riegeli

#endif  // RIEGELI_BASE_BYTES_REF_H_


================================================
FILE: riegeli/base/c_string_ref.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_C_STRING_REF_H_
#define RIEGELI_BASE_C_STRING_REF_H_

#include <cstddef>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/string_ref.h"
#include "riegeli/base/temporary_storage.h"
#include "riegeli/base/type_traits.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// `CStringRef` stores a pointer to a C-style NUL-terminated string
// or `nullptr`, possibly converted from another string representation.
//
// It is intended for function parameters when the implementation needs
// a C-style NUL-terminated string, and the caller might have another
// representation of the string.
//
// It is convertible from:
//  * `std::nullptr_t`
//  * types convertible to `const char*`
//  * types supporting `c_str()`, e.g. `std::string` or mutable `CompactString`
//  * types convertible to `absl::string_view`
//
// It copies string contents when this is needed for NUL-termination,
// e.g. for types convertible to `absl::string_view` excluding `std::string`
// and mutable `CompactString`. In that case the string is stored in a storage
// object passed as a default argument to the constructor.
//
// `CStringRef` does not own string contents and is efficiently copyable.
class ABSL_NULLABILITY_COMPATIBLE CStringRef : public WithEqual<CStringRef> {
 private:
  template <typename T, typename Enable = void>
  struct HasCStr : std::false_type {};

  template <typename T>
  struct HasCStr<T, std::enable_if_t<std::is_convertible_v<
                        decltype(std::declval<T>().c_str()), const char*>>>
      : std::true_type {};

 public:
  // Stores `nullptr`.
  CStringRef() = default;
  /*implicit*/ CStringRef(std::nullptr_t) {}

  // Stores `str`.
  /*implicit*/ CStringRef(
      const char* absl_nullable str ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : c_str_(str) {}

  // Stores `str` converted to `const char*`.
  template <typename T,
            std::enable_if_t<
                std::conjunction_v<NotSameRef<CStringRef, T>,
                                   NotSameRef<std::nullptr_t, T>,
                                   NotSameRef<const char*, T>,
                                   std::is_convertible<T&&, const char*>>,
                int> = 0>
  /*implicit*/ CStringRef(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : c_str_(std::forward<T>(str)) {}

  // Stores `str.c_str()`. This applies e.g. to `std::string` and
  // mutable `CompactString`.
  template <typename T,
            std::enable_if_t<
                std::conjunction_v<
                    NotSameRef<CStringRef, T>, NotSameRef<std::nullptr_t, T>,
                    std::negation<std::is_convertible<T&&, const char*>>,
                    HasCStr<T&&>>,
                int> = 0>
  /*implicit*/ CStringRef(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : c_str_(std::forward<T>(str).c_str()) {}

  // Stores a pointer to the first character of a NUL-terminated copy of `str`
  // converted to `StringRef` and then to `absl::string_view`.
  //
  // The string is stored in a storage object passed as a default argument to
  // this constructor.
  template <
      typename T,
      std::enable_if_t<
          std::conjunction_v<
              NotSameRef<CStringRef, T>, NotSameRef<std::nullptr_t, T>,
              std::negation<std::is_convertible<T&&, const char*>>,
              std::negation<HasCStr<T&&>>, std::is_convertible<T&&, StringRef>>,
          int> = 0>
  /*implicit*/ CStringRef(T&& str, TemporaryStorage<std::string>&& storage
                                       ABSL_ATTRIBUTE_LIFETIME_BOUND = {})
      : CStringRef(std::move(storage).emplace(
            absl::string_view(StringRef(std::forward<T>(str))))) {}

  CStringRef(const CStringRef& that) = default;
  CStringRef& operator=(const CStringRef&) = delete;

  // Returns the pointer to the C-style NUL-terminated string, or `nullptr`.
  const char* absl_nullable c_str() const { return c_str_; }

  friend bool operator==(CStringRef a, std::nullptr_t) {
    return a.c_str_ == nullptr;
  }

 private:
  using pointer = const char*;  // For `ABSL_NULLABILITY_COMPATIBLE`.

  const char* absl_nullable c_str_ = nullptr;
};

}  // namespace riegeli

#endif  // RIEGELI_BASE_C_STRING_REF_H_


================================================
FILE: riegeli/base/chain.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <stddef.h>

#include <algorithm>
#include <cstring>
#include <ios>
#include <limits>
#include <memory>
#include <optional>
#include <ostream>
#include <string>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/container/inlined_vector.h"
#include "absl/strings/cord.h"
#include "absl/strings/resize_and_overwrite.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain_base.h"
#include "riegeli/base/chain_details.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/external_ref_base.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/intrusive_shared_ptr.h"
#include "riegeli/base/invoker.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/memory_estimator.h"
#include "riegeli/base/new_aligned.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/ownership.h"
#include "riegeli/base/stream_utils.h"
#include "riegeli/base/string_utils.h"

namespace riegeli {

namespace {

// Stores an `absl::Cord` which must be flat, i.e.
// `src.TryFlat() != std::nullopt`.
//
// This design relies on the fact that moving a flat `absl::Cord` results in a
// flat `absl::Cord`.
class FlatCordBlock {
 public:
  explicit FlatCordBlock(Initializer<absl::Cord> src);

  FlatCordBlock(FlatCordBlock&& that) = default;
  FlatCordBlock& operator=(FlatCordBlock&& that) = default;

  const absl::Cord& src() const { return src_; }

  /*implicit*/ operator absl::string_view() const;

  // Supports `ExternalRef` and `Chain::Block`.
  friend void RiegeliDumpStructure(
      ABSL_ATTRIBUTE_UNUSED const FlatCordBlock* self, std::ostream& dest) {
    dest << "[cord] { }";
  }

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const FlatCordBlock* self,
                                        MemoryEstimator& memory_estimator) {
    memory_estimator.RegisterSubobjects(&self->src_);
  }

 private:
  // Invariant: `src_.TryFlat() != std::nullopt`
  absl::Cord src_;
};

inline FlatCordBlock::FlatCordBlock(Initializer<absl::Cord> src)
    : src_(std::move(src)) {
  RIEGELI_ASSERT(src_.TryFlat() != std::nullopt)
      << "Failed precondition of FlatCordBlock::FlatCordBlock(): "
         "Cord is not flat";
}

inline FlatCordBlock::operator absl::string_view() const {
  if (const std::optional<absl::string_view> flat = src_.TryFlat();
      flat != std::nullopt) {
    return *flat;
  }
  RIEGELI_ASSUME_UNREACHABLE()
      << "Failed invariant of FlatCordBlock: Cord is not flat";
}

}  // namespace

namespace chain_internal {

void DumpStructureDefault(std::ostream& dest) { dest << "[external] { }"; }

}  // namespace chain_internal

void RiegeliDumpStructure(const std::string* self, std::ostream& dest) {
  dest << "[string] { capacity: " << self->capacity() << " }";
}

inline IntrusiveSharedPtr<Chain::RawBlock> Chain::RawBlock::NewInternal(
    size_t min_capacity) {
  RIEGELI_ASSERT_GT(min_capacity, 0u)
      << "Failed precondition of Chain::RawBlock::NewInternal(): zero capacity";
  size_t raw_capacity;
  return IntrusiveSharedPtr<RawBlock>(SizeReturningNewAligned<RawBlock>(
      kInternalAllocatedOffset() + min_capacity, &raw_capacity, &raw_capacity));
}

inline Chain::RawBlock::RawBlock(const size_t* raw_capacity)
    : substr_(allocated_begin_, 0),
      // Redundant cast is needed for `-fsanitize=bounds`.
      allocated_end_(static_cast<char*>(allocated_begin_) +
                     (*raw_capacity - kInternalAllocatedOffset())) {
  RIEGELI_ASSERT(is_internal()) << "A RawBlock with allocated_end_ != nullptr "
                                   "should be considered internal";
  RIEGELI_ASSERT_LE(capacity(), RawBlock::kMaxCapacity)
      << "Chain block capacity overflow";
}

inline IntrusiveSharedPtr<Chain::RawBlock> Chain::RawBlock::Copy() {
  IntrusiveSharedPtr<RawBlock> block = NewInternal(size());
  block->Append(*this);
  RIEGELI_ASSERT(!block->wasteful())
      << "A full block should not be considered wasteful";
  return block;
}

inline size_t Chain::RawBlock::space_before() const {
  RIEGELI_ASSERT(is_internal())
      << "Failed precondition of Chain::RawBlock::space_before(): "
         "block not internal";
  return PtrDistance(allocated_begin_, data_begin());
}

inline size_t Chain::RawBlock::space_after() const {
  RIEGELI_ASSERT(is_internal())
      << "Failed precondition of Chain::RawBlock::space_after(): "
         "block not internal";
  return PtrDistance(data_end(), allocated_end_);
}

inline bool Chain::RawBlock::tiny(size_t extra_size) const {
  if (is_internal()) {
    RIEGELI_ASSERT_LE(size(), capacity())
        << "Failed invariant of Chain::RawBlock: size greater than capacity";
    RIEGELI_ASSERT_LE(extra_size, capacity() - size())
        << "Failed precondition of Chain::RawBlock::tiny(): "
           "extra size greater than remaining space";
  } else {
    RIEGELI_ASSERT_EQ(extra_size, 0u)
        << "Failed precondition of Chain::RawBlock::tiny(): "
           "non-zero extra size of external block";
  }
  return size() + extra_size < kDefaultMinBlockSize;
}

inline bool Chain::RawBlock::wasteful(size_t extra_size) const {
  if (is_internal()) {
    RIEGELI_ASSERT_LE(size(), capacity())
        << "Failed invariant of Chain::RawBlock: size greater than capacity";
    RIEGELI_ASSERT_LE(extra_size, capacity() - size())
        << "Failed precondition of Chain::RawBlock::wasteful(): "
           "extra size greater than remaining space";
  } else {
    RIEGELI_ASSERT_EQ(extra_size, 0u)
        << "Failed precondition of Chain::RawBlock::wasteful(): "
           "non-zero extra size of external block";
    return false;
  }
  return Wasteful(kInternalAllocatedOffset() + capacity(), size() + extra_size);
}

inline void Chain::RawBlock::DumpStructure(std::ostream& dest) const {
  dest << "block {";
  const size_t ref_count = ref_count_.GetCount();
  if (ref_count != 1) dest << " ref_count: " << ref_count;
  dest << " size: " << size();
  if (is_internal()) {
    if (space_before() > 0) dest << " space_before: " << space_before();
    dest << " space_after: " << space_after();
  } else {
    dest << " ";
    external_.methods->dump_structure(*this, dest);
  }
  dest << " }";
}

size_t Chain::RawBlock::DynamicSizeOf() const {
  if (is_internal()) {
    return kInternalAllocatedOffset() + capacity();
  } else {
    return external_.methods->dynamic_sizeof;
  }
}

void Chain::RawBlock::RegisterSubobjects(
    MemoryEstimator& memory_estimator) const {
  if (!is_internal()) {
    external_.methods->register_subobjects(this, memory_estimator);
  }
}

inline bool Chain::RawBlock::can_append(size_t length) const {
  return is_mutable() && (empty() ? capacity() : space_after()) >= length;
}

inline bool Chain::RawBlock::can_prepend(size_t length) const {
  return is_mutable() && (empty() ? capacity() : space_before()) >= length;
}

inline absl::Span<char> Chain::RawBlock::AppendBuffer(size_t max_length)
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_ASSERT(is_mutable())
      << "Failed precondition of Chain::RawBlock::AppendBuffer(): "
         "block is immutable";
  if (empty()) substr_ = absl::string_view(allocated_begin_, 0);
  const size_t length = UnsignedMin(space_after(), max_length);
  const absl::Span<char> buffer(const_cast<char*>(data_end()), length);
  substr_ = absl::string_view(data_begin(), size() + length);
  return buffer;
}

inline absl::Span<char> Chain::RawBlock::PrependBuffer(size_t max_length)
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_ASSERT(is_mutable())
      << "Failed precondition of Chain::RawBlock::PrependBuffer(): "
         "block is immutable";
  if (empty()) substr_ = absl::string_view(allocated_end_, 0);
  const size_t length = UnsignedMin(space_before(), max_length);
  const absl::Span<char> buffer(const_cast<char*>(data_begin()) - length,
                                length);
  substr_ = absl::string_view(data_begin() - length, size() + length);
  return buffer;
}

inline void Chain::RawBlock::Append(absl::string_view src,
                                    size_t space_before) {
  if (empty()) {
    // Redundant cast is needed for `-fsanitize=bounds`.
    substr_ = absl::string_view(
        static_cast<char*>(allocated_begin_) + space_before, 0);
  }
  AppendWithExplicitSizeToCopy(src, src.size());
}

inline void Chain::RawBlock::AppendWithExplicitSizeToCopy(absl::string_view src,
                                                          size_t size_to_copy) {
  RIEGELI_ASSERT_GE(size_to_copy, src.size())
      << "Failed precondition of "
         "Chain::RawBlock::AppendWithExplicitSizeToCopy(): "
         "size to copy too small";
  RIEGELI_ASSERT(can_append(size_to_copy))
      << "Failed precondition of "
         "Chain::RawBlock::AppendWithExplicitSizeToCopy(): "
         "not enough space";
  std::memcpy(const_cast<char*>(data_end()), src.data(), size_to_copy);
  substr_ = absl::string_view(data_begin(), size() + src.size());
}

inline void Chain::RawBlock::Prepend(absl::string_view src,
                                     size_t space_after) {
  RIEGELI_ASSERT(can_prepend(src.size()))
      << "Failed precondition of Chain::RawBlock::Prepend(): "
         "not enough space";
  if (empty()) substr_ = absl::string_view(allocated_end_ - space_after, 0);
  std::memcpy(const_cast<char*>(data_begin() - src.size()), src.data(),
              src.size());
  substr_ = absl::string_view(data_begin() - src.size(), size() + src.size());
}

size_t Chain::BlockIterator::CharIndexInChainInternal() const {
  if (ptr_ == kBeginShortData) {
    return 0;
  } else if (ptr_ == kEndShortData ||
             ptr_ == BlockPtrPtr::from_ptr(chain_->end_)) {
    return chain_->size();
  } else if (chain_->has_here()) {
    switch (block_index()) {
      case 0:
        return 0;
      case 1:
        return chain_->begin_[0].block_ptr->size();
      default:
        RIEGELI_ASSUME_UNREACHABLE()
            << "Failed invariant of Chain: "
               "only two block pointers fit without allocating their array";
    }
  } else {
    const size_t offset_base =
        chain_->begin_[chain_->block_offsets()].block_offset;
    return ptr_.as_ptr()[chain_->block_offsets()].block_offset - offset_base;
  }
}

Chain::Block Chain::Block::ToChainBlock(absl::string_view substr) && {
  if (substr.size() == block_->size()) return std::move(*this);
  return Block(std::move(*this), substr);
}

absl::Cord Chain::Block::ToCord(absl::string_view substr) && {
  if (const FlatCordBlock* const cord_ptr =
          block_->checked_external_object<FlatCordBlock>()) {
    if (substr.size() == cord_ptr->src().size()) return cord_ptr->src();
    return cord_ptr->src().Subcord(
        PtrDistance(absl::string_view(*cord_ptr).data(), substr.data()),
        substr.size());
  }
  return absl::MakeCordFromExternal(substr, [block = std::move(block_)] {});
}

absl::Cord Chain::Block::ToCord(absl::string_view substr) const& {
  if (const FlatCordBlock* const cord_ptr =
          block_->checked_external_object<FlatCordBlock>()) {
    if (substr.size() == cord_ptr->src().size()) return cord_ptr->src();
    return cord_ptr->src().Subcord(
        PtrDistance(absl::string_view(*cord_ptr).data(), substr.data()),
        substr.size());
  }
  return absl::MakeCordFromExternal(substr, [block = block_] {});
}

void Chain::Block::DumpStructure(absl::string_view substr,
                                 std::ostream& dest) const {
  dest << "[block] { offset: "
       << PtrDistance(block_->data_begin(), substr.data()) << " ";
  block_->DumpStructure(dest);
  dest << " }";
}

Chain::Chain(const absl::Cord& src) { Initialize(src); }

Chain::Chain(absl::Cord&& src) { Initialize(std::move(src)); }

Chain::Chain(const Chain& that) { Initialize(that); }

Chain& Chain::operator=(const Chain& that) {
  if (ABSL_PREDICT_TRUE(&that != this)) {
    UnrefBlocks();
    Initialize(that);
  }
  return *this;
}

bool Chain::ClearSlow() {
  RIEGELI_ASSERT_NE(begin_, end_)
      << "Failed precondition of Chain::ClearSlow(): "
         "no blocks, use Clear() instead";
  const bool block_remains = front()->TryClear();
  BlockPtr* const new_end = begin_ + (block_remains ? 1 : 0);
  UnrefBlocks(new_end, end_);
  end_ = new_end;
  return block_remains;
}

void Chain::Reset(BytesRef src) {
  size_ = 0;
  if (begin_ != end_ && ClearSlow()) {
    Append(src, Options().set_size_hint(src.size()));
    return;
  }
  Initialize(src);
}

void Chain::Reset(Block src) {
  size_ = 0;
  UnrefBlocks();
  end_ = begin_;
  if (src.raw_block() != nullptr) Initialize(std::move(src));
}

void Chain::Reset(const absl::Cord& src) {
  size_ = 0;
  if (begin_ != end_ && ClearSlow()) {
    Append(src, Options().set_size_hint(src.size()));
    return;
  }
  Initialize(src);
}

void Chain::Reset(absl::Cord&& src) {
  size_ = 0;
  if (begin_ != end_ && ClearSlow()) {
    const size_t size = src.size();
    Append(std::move(src), Options().set_size_hint(size));
    return;
  }
  Initialize(std::move(src));
}

void Chain::InitializeSlow(absl::string_view src) {
  RIEGELI_ASSERT_GT(src.size(), kMaxShortDataSize)
      << "Failed precondition of Chain::InitializeSlow(string_view): "
         "string too short, use Initialize() instead";
  IntrusiveSharedPtr<RawBlock> block =
      RawBlock::NewInternal(UnsignedMin(src.size(), kDefaultMaxBlockSize));
  const absl::Span<char> buffer = block->AppendBuffer(src.size());
  std::memcpy(buffer.data(), src.data(), buffer.size());
  Initialize(Block(std::move(block)));
  Options options;
  options.set_size_hint(src.size());
  src.remove_prefix(buffer.size());
  Append(src, options);
}

inline void Chain::Initialize(const absl::Cord& src) {
  RIEGELI_ASSERT_EQ(size_, 0u)
      << "Failed precondition of Chain::Initialize(const Cord&): "
         "size not reset";
  InitializeFromCord(src);
}

inline void Chain::Initialize(absl::Cord&& src) {
  RIEGELI_ASSERT_EQ(size_, 0u)
      << "Failed precondition of Chain::Initialize(absl::Cord&&): "
         "size not reset";
  InitializeFromCord(std::move(src));
}

template <typename CordRef>
inline void Chain::InitializeFromCord(CordRef&& src) {
  if (const std::optional<absl::string_view> flat = src.TryFlat();
      flat != std::nullopt) {
    if (flat->size() <= kMaxBytesToCopyToEmpty) {
      Initialize(*flat);
    } else {
      Initialize(
          Block(riegeli::Maker<FlatCordBlock>(std::forward<CordRef>(src))));
    }
    return;
  }
  AppendCordSlow(std::forward<CordRef>(src),
                 Options().set_size_hint(src.size()));
}

inline void Chain::Initialize(const Chain& src) {
  size_ = src.size_;
  end_ = begin_;
  if (src.begin_ == src.end_) {
    EnsureHasHere();
    std::memcpy(short_data_begin(), src.short_data_begin(), kMaxShortDataSize);
  } else {
    AppendBlocks<ShareOwnership>(src.begin_, src.end_);
  }
}

inline std::string Chain::ToString() const {
  if (begin_ == end_) return std::string(short_data());
  std::string dest;
  absl::StringResizeAndOverwrite(dest, size_, [&](char* data, size_t size) {
    CopyToSlow(data);
    return size;
  });
  return dest;
}

absl::string_view Chain::FlattenSlow() {
  RIEGELI_ASSERT_GT(end_ - begin_, 1)
      << "Failed precondition of Chain::FlattenSlow(): "
         "contents already flat, use Flatten() instead";
  if (front()->empty()) {
    PopFront();
    if (end_ - begin_ == 1) return *front();
  }
  if (back()->empty()) {
    PopBack();
    if (end_ - begin_ == 1) return *back();
  }
  IntrusiveSharedPtr<RawBlock> block =
      RawBlock::NewInternal(NewBlockCapacity(0, size_, size_, Options()));
  const BlockPtr* iter = begin_;
  do {
    block->Append(*iter->block_ptr);
    ++iter;
  } while (iter != end_);
  UnrefBlocks(begin_, end_);
  end_ = begin_;
  PushBack(std::move(block));
  return *back();
}

inline Chain::BlockPtr* Chain::NewBlockPtrs(size_t capacity) {
  return std::allocator<BlockPtr>().allocate(2 * capacity);
}

void Chain::UnrefBlocksSlow(const BlockPtr* begin, const BlockPtr* end) {
  RIEGELI_ASSERT_LT(begin, end)
      << "Failed precondition of Chain::UnrefBlocksSlow(): "
         "no blocks, use UnrefBlocks() instead";
  do {
    (begin++)->block_ptr->Unref();
  } while (begin != end);
}

inline void Chain::DropPassedBlocks(PassOwnership) {
  size_ = 0;
  end_ = begin_;
}

inline void Chain::DropPassedBlocks(ShareOwnership) const {}

void Chain::CopyTo(char* dest) const {
  if (begin_ == end_) {
    riegeli::null_safe_memcpy(dest, short_data_begin(), size_);
    return;
  }
  CopyToSlow(dest);
}

inline void Chain::CopyToSlow(char* dest) const {
  RIEGELI_ASSERT_NE(begin_, end_)
      << "Failed precondition of Chain::CopyToSlow(): "
         "no blocks, use CopyTo() instead";
  const BlockPtr* iter = begin_;
  do {
    std::memcpy(dest, iter->block_ptr->data_begin(), iter->block_ptr->size());
    dest += iter->block_ptr->size();
    ++iter;
  } while (iter != end_);
}

void Chain::AppendTo(std::string& dest) const& {
  const size_t old_size = dest.size();
  RIEGELI_CHECK_LE(size_, std::numeric_limits<size_t>::max() - old_size)
      << "Failed precondition of Chain::AppendTo(string&): "
         "string size overflow";
  riegeli::StringResizeAndOverwriteAmortized(dest, old_size + size_,
                                             [&](char* data, size_t size) {
                                               CopyTo(data + old_size);
                                               return size;
                                             });
}

void Chain::AppendTo(std::string& dest) && {
  if (dest.empty() && PtrDistance(begin_, end_) == 1) {
    if (std::string* const string_ptr =
            back()->checked_external_object_with_unique_owner<std::string>()) {
      RIEGELI_ASSERT_EQ(back()->size(), string_ptr->size())
          << "Failed invariant of Chain::RawBlock: "
             "block size differs from string size";
      if (dest.capacity() <= string_ptr->capacity()) {
        dest = std::move(*string_ptr);
        size_ = 0;
        PopBack();
        return;
      }
    }
  }
  const size_t old_size = dest.size();
  RIEGELI_CHECK_LE(size_, std::numeric_limits<size_t>::max() - old_size)
      << "Failed precondition of Chain::AppendTo(string&): "
         "string size overflow";
  riegeli::StringResizeAndOverwriteAmortized(dest, old_size + size_,
                                             [&](char* data, size_t size) {
                                               CopyTo(data + old_size);
                                               return size;
                                             });
}

void Chain::AppendTo(absl::Cord& dest) const& {
  RIEGELI_CHECK_LE(size_, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Chain::AppendTo(Cord&): Cord size overflow";
  if (begin_ == end_) {
    dest.Append(short_data());
    return;
  }
  AppendToSlow(dest);
}

void Chain::AppendTo(absl::Cord& dest) && {
  RIEGELI_CHECK_LE(size_, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Chain::AppendTo(Cord&): Cord size overflow";
  if (begin_ == end_) {
    dest.Append(short_data());
    return;
  }
  std::move(*this).AppendToSlow(dest);
}

inline void Chain::AppendToSlow(absl::Cord& dest) const& {
  RIEGELI_ASSERT_NE(begin_, end_)
      << "Failed precondition of Chain::AppendToSlow(Cord&): "
         "no blocks, use AppendTo() instead";
  const BlockPtr* iter = begin_;
  do {
    ExternalRef(riegeli::Invoker(MakeBlock(), iter->block_ptr),
                *iter->block_ptr)
        .AppendTo(dest);
    ++iter;
  } while (iter != end_);
}

inline void Chain::AppendToSlow(absl::Cord& dest) && {
  RIEGELI_ASSERT_NE(begin_, end_)
      << "Failed precondition of Chain::AppendToSlow(Cord&): "
         "no blocks, use AppendTo() instead";
  size_ = 0;
  const BlockPtr* iter = begin_;
  do {
    ExternalRef(riegeli::Invoker(MakeBlock(),
                                 IntrusiveSharedPtr<RawBlock>(iter->block_ptr)),
                *iter->block_ptr)
        .AppendTo(dest);
    ++iter;
  } while (iter != end_);
  end_ = begin_;
}

void Chain::PrependTo(absl::Cord& dest) const& {
  RIEGELI_CHECK_LE(size_, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Chain::PrependTo(Cord&): Cord size overflow";
  if (begin_ == end_) {
    dest.Prepend(short_data());
    return;
  }
  PrependToSlow(dest);
}

void Chain::PrependTo(absl::Cord& dest) && {
  RIEGELI_CHECK_LE(size_, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Chain::PrependTo(Cord&): Cord size overflow";
  if (begin_ == end_) {
    dest.Prepend(short_data());
    return;
  }
  std::move(*this).PrependToSlow(dest);
}

inline void Chain::PrependToSlow(absl::Cord& dest) const& {
  RIEGELI_ASSERT_NE(end_, begin_)
      << "Failed precondition of Chain::PrependToSlow(Cord&): "
         "no blocks, use PrependTo() instead";
  const BlockPtr* iter = end_;
  do {
    --iter;
    ExternalRef(riegeli::Invoker(MakeBlock(), iter->block_ptr),
                *iter->block_ptr)
        .PrependTo(dest);
  } while (iter != begin_);
}

inline void Chain::PrependToSlow(absl::Cord& dest) && {
  RIEGELI_ASSERT_NE(end_, begin_)
      << "Failed precondition of Chain::PrependToSlow(Cord&): "
         "no blocks, use PrependTo() instead";
  const BlockPtr* iter = end_;
  size_ = 0;
  do {
    --iter;
    ExternalRef(riegeli::Invoker(MakeBlock(),
                                 IntrusiveSharedPtr<RawBlock>(iter->block_ptr)),
                *iter->block_ptr)
        .PrependTo(dest);
  } while (iter != begin_);
  end_ = begin_;
}

Chain::operator std::string() const& { return ToString(); }

Chain::operator std::string() && {
  if (PtrDistance(begin_, end_) == 1) {
    if (std::string* const string_ptr =
            back()->checked_external_object_with_unique_owner<std::string>()) {
      RIEGELI_ASSERT_EQ(back()->size(), string_ptr->size())
          << "Failed invariant of Chain::RawBlock: "
             "block size differs from string size";
      const std::string dest = std::move(*string_ptr);
      size_ = 0;
      PopBack();
      return dest;
    }
  }
  return ToString();
}

Chain::operator absl::Cord() const& {
  if (begin_ == end_) return absl::Cord(short_data());
  absl::Cord dest;
  AppendToSlow(dest);
  return dest;
}

Chain::operator absl::Cord() && {
  if (begin_ == end_) return absl::Cord(short_data());
  absl::Cord dest;
  std::move(*this).AppendToSlow(dest);
  return dest;
}

Chain::BlockAndChar Chain::BlockAndCharIndex(size_t char_index_in_chain) const {
  RIEGELI_ASSERT_LE(char_index_in_chain, size())
      << "Failed precondition of Chain::BlockAndCharIndex(): "
         "position out of range";
  if (char_index_in_chain == size()) {
    return BlockAndChar{blocks().cend(), 0};
  } else if (begin_ == end_) {
    return BlockAndChar{blocks().cbegin(), char_index_in_chain};
  } else if (has_here()) {
    BlockIterator block_iter = blocks().cbegin();
    if (char_index_in_chain >= block_iter->size()) {
      char_index_in_chain -= block_iter->size();
      ++block_iter;
      RIEGELI_ASSERT_LT(char_index_in_chain, block_iter->size())
          << "Failed invariant of Chain: "
             "only two block pointers fit without allocating their array";
    }
    return BlockAndChar{block_iter, char_index_in_chain};
  } else {
    const size_t offset_base = begin_[block_offsets()].block_offset;
    const BlockPtr* const found =
        std::upper_bound(begin_ + block_offsets() + 1, end_ + block_offsets(),
                         char_index_in_chain,
                         [&](size_t value, BlockPtr element) {
                           return value < element.block_offset - offset_base;
                         }) -
        1;
    return BlockAndChar{
        BlockIterator(this, PtrDistance(begin_ + block_offsets(), found)),
        char_index_in_chain - (found->block_offset - offset_base)};
  }
}

void Chain::DumpStructure(std::ostream& dest) const {
  dest << "chain {\n  size: " << size_
       << " memory: " << riegeli::EstimateMemory(*this);
  for (const BlockPtr* iter = begin_; iter != end_; ++iter) {
    dest << "\n  ";
    iter->block_ptr->DumpStructure(dest);
  }
  dest << "\n}\n";
}

void Chain::RegisterSubobjects(MemoryEstimator& memory_estimator) const {
  if (has_allocated()) {
    memory_estimator.RegisterMemory(
        2 *
        PtrDistance(block_ptrs_.allocated.begin, block_ptrs_.allocated.end) *
        sizeof(BlockPtr));
  }
  for (const BlockPtr* iter = begin_; iter != end_; ++iter) {
    if (memory_estimator.RegisterNode(iter->block_ptr)) {
      memory_estimator.RegisterDynamicObject(iter->block_ptr);
    }
  }
}

inline IntrusiveSharedPtr<Chain::RawBlock> Chain::SetBack(
    IntrusiveSharedPtr<RawBlock> block) {
  return IntrusiveSharedPtr<RawBlock>(
      std::exchange(end_[-1].block_ptr, block.Release()));
  // There is no need to adjust block offsets because the size of the last block
  // is not reflected in block offsets.
}

inline IntrusiveSharedPtr<Chain::RawBlock> Chain::SetFront(
    IntrusiveSharedPtr<RawBlock> block) {
  IntrusiveSharedPtr<RawBlock> old_block = SetFrontSameSize(std::move(block));
  RefreshFront();
  return old_block;
}

inline IntrusiveSharedPtr<Chain::RawBlock> Chain::SetFrontSameSize(
    IntrusiveSharedPtr<RawBlock> block) {
  return IntrusiveSharedPtr<RawBlock>(
      std::exchange(begin_[0].block_ptr, block.Release()));
}

inline void Chain::RefreshFront() {
  if (has_allocated()) {
    begin_[block_offsets()].block_offset =
        begin_ + 1 == end_ ? size_t{0}
                           : begin_[block_offsets() + 1].block_offset -
                                 begin_[0].block_ptr->size();
  }
}

inline void Chain::PushBack(IntrusiveSharedPtr<RawBlock> block) {
  ReserveBack(1);
  end_[0].block_ptr = block.Release();
  if (has_allocated()) {
    end_[block_offsets()].block_offset =
        begin_ == end_ ? size_t{0}
                       : end_[block_offsets() - 1].block_offset +
                             end_[-1].block_ptr->size();
  }
  ++end_;
}

inline void Chain::PushFront(IntrusiveSharedPtr<RawBlock> block) {
  ReserveFront(1);
  BlockPtr* const old_begin = begin_;
  --begin_;
  begin_[0].block_ptr = block.Release();
  if (has_allocated()) {
    begin_[block_offsets()].block_offset =
        old_begin == end_ ? size_t{0}
                          : begin_[block_offsets() + 1].block_offset -
                                begin_[0].block_ptr->size();
  }
}

inline IntrusiveSharedPtr<Chain::RawBlock> Chain::PopBack() {
  RIEGELI_ASSERT_NE(begin_, end_)
      << "Failed precondition of Chain::PopBack(): no blocks";
  --end_;
  return IntrusiveSharedPtr<RawBlock>(end_[0].block_ptr);
}

inline IntrusiveSharedPtr<Chain::RawBlock> Chain::PopFront() {
  RIEGELI_ASSERT_NE(begin_, end_)
      << "Failed precondition of Chain::PopFront(): no blocks";
  if (has_here()) {
    // Shift the remaining 0 or 1 block pointers to the left by 1 because
    // `begin_` must remain at `block_ptrs_.here`. There might be no pointer to
    // copy; it is more efficient to copy the array slot unconditionally.
    IntrusiveSharedPtr<RawBlock> block(
        std::exchange(block_ptrs_.here[0], block_ptrs_.here[1]).block_ptr);
    --end_;
    return block;
  } else {
    ++begin_;
    return IntrusiveSharedPtr<RawBlock>(begin_[-1].block_ptr);
  }
}

template <typename Ownership>
inline void Chain::AppendBlocks(const BlockPtr* begin, const BlockPtr* end) {
  if (begin == end) return;
  ReserveBack(PtrDistance(begin, end));
  BlockPtr* dest_iter = end_;
  dest_iter->block_ptr = begin->block_ptr->Ref<Ownership>();
  if (has_allocated()) {
    const size_t offsets = block_offsets();
    size_t offset = begin_ == end_ ? size_t{0}
                                   : dest_iter[offsets - 1].block_offset +
                                         dest_iter[-1].block_ptr->size();
    dest_iter[offsets].block_offset = offset;
    ++begin;
    ++dest_iter;
    while (begin != end) {
      dest_iter->block_ptr = begin->block_ptr->Ref<Ownership>();
      offset += dest_iter[-1].block_ptr->size();
      dest_iter[offsets].block_offset = offset;
      ++begin;
      ++dest_iter;
    }
  } else {
    ++begin;
    ++dest_iter;
    if (begin != end) {
      dest_iter->block_ptr = begin->block_ptr->Ref<Ownership>();
      ++begin;
      ++dest_iter;
      RIEGELI_ASSERT_EQ(begin, end)
          << "Failed invariant of Chain: "
             "only two block pointers fit without allocating their array";
    }
  }
  end_ = dest_iter;
}

template <typename Ownership>
inline void Chain::PrependBlocks(const BlockPtr* begin, const BlockPtr* end) {
  if (begin == end) return;
  ReserveFront(PtrDistance(begin, end));
  BlockPtr* dest_iter = begin_;
  BlockPtr* const old_begin = begin_;
  begin_ -= PtrDistance(begin, end);  // For `has_allocated()` to work.
  --end;
  --dest_iter;
  dest_iter->block_ptr = end->block_ptr->Ref<Ownership>();
  if (has_allocated()) {
    const size_t offsets = block_offsets();
    size_t offset = old_begin == end_ ? size_t{0}
                                      : dest_iter[offsets + 1].block_offset -
                                            dest_iter->block_ptr->size();
    dest_iter[offsets].block_offset = offset;
    while (end != begin) {
      --end;
      --dest_iter;
      dest_iter->block_ptr = end->block_ptr->Ref<Ownership>();
      offset -= dest_iter->block_ptr->size();
      dest_iter[offsets].block_offset = offset;
    }
  } else {
    if (end != begin) {
      --end;
      --dest_iter;
      dest_iter->block_ptr = end->block_ptr->Ref<Ownership>();
      RIEGELI_ASSERT_EQ(begin, end)
          << "Failed invariant of Chain: "
             "only two block pointers fit without allocating their array";
    }
  }
}

inline void Chain::ReserveBack(size_t extra_capacity) {
  BlockPtr* const allocated_end =
      has_here() ? block_ptrs_.here + 2 : block_ptrs_.allocated.end;
  if (ABSL_PREDICT_FALSE(extra_capacity > PtrDistance(end_, allocated_end))) {
    // The slow path is in a separate function to make easier for the compiler
    // to make good inlining decisions.
    ReserveBackSlow(extra_capacity);
  }
}

inline void Chain::ReserveFront(size_t extra_capacity) {
  BlockPtr* const allocated_begin =
      has_here() ? block_ptrs_.here : block_ptrs_.allocated.begin;
  if (ABSL_PREDICT_FALSE(extra_capacity >
                         PtrDistance(allocated_begin, begin_))) {
    // The slow path is in a separate function to make easier for the compiler
    // to make good inlining decisions.
    ReserveFrontSlow(extra_capacity);
  }
}

inline void Chain::ReserveBackSlow(size_t extra_capacity) {
  RIEGELI_ASSERT_GT(extra_capacity, 0u)
      << "Failed precondition of Chain::ReserveBackSlow(): "
         "nothing to do, use ReserveBack() instead";
  BlockPtr* old_allocated_begin;
  BlockPtr* old_allocated_end;
  if (has_here()) {
    old_allocated_begin = block_ptrs_.here;
    old_allocated_end = block_ptrs_.here + 2;
  } else {
    old_allocated_begin = block_ptrs_.allocated.begin;
    old_allocated_end = block_ptrs_.allocated.end;
  }
  RIEGELI_ASSERT_GT(extra_capacity, PtrDistance(end_, old_allocated_end))
      << "Failed precondition of Chain::ReserveBackSlow(): "
         "extra capacity fits in allocated space, use ReserveBack() instead";
  RIEGELI_ASSERT_LE(extra_capacity, std::numeric_limits<size_t>::max() /
                                            (2 * sizeof(BlockPtr)) -
                                        PtrDistance(old_allocated_begin, end_))
      << "Failed invariant of Chain: array of block pointers overflow, "
         "possibly blocks are too small";
  const size_t old_capacity =
      PtrDistance(old_allocated_begin, old_allocated_end);
  const size_t size = PtrDistance(begin_, end_);
  if (size + extra_capacity <= old_capacity && 2 * size <= old_capacity) {
    RIEGELI_ASSERT(has_allocated())
        << "The case of has_here() if there is space without reallocation "
           "was handled in ReserveBack()";
    // Existing array has enough capacity and is at most half full: move
    // contents to the beginning of the array. This is enough to make the
    // amortized cost of adding one element constant as long as prepending
    // leaves space at both ends.
    BlockPtr* const new_begin = old_allocated_begin;
    // Moving left, so block pointers must be moved before block offsets.
    std::memmove(new_begin, begin_, size * sizeof(BlockPtr));
    std::memmove(new_begin + old_capacity, begin_ + old_capacity,
                 size * sizeof(BlockPtr));
    begin_ = new_begin;
    end_ = new_begin + size;
    return;
  }
  // Reallocate the array, without keeping space before the contents. This is
  // enough to make the amortized cost of adding one element constant if
  // prepending leaves space at both ends.
  RIEGELI_ASSERT_LE(old_capacity / 2, std::numeric_limits<size_t>::max() /
                                              (2 * sizeof(BlockPtr)) -
                                          old_capacity)
      << "Failed invariant of Chain: array of block pointers overflow, "
         "possibly blocks are too small";
  const size_t new_capacity =
      UnsignedMax(PtrDistance(begin_, end_) + extra_capacity,
                  old_capacity + old_capacity / 2, size_t{16});
  BlockPtr* const new_allocated_begin = NewBlockPtrs(new_capacity);
  BlockPtr* const new_allocated_end = new_allocated_begin + new_capacity;
  BlockPtr* const new_begin = new_allocated_begin;
  BlockPtr* const new_end = new_begin + size;
  std::memcpy(new_begin, begin_, size * sizeof(BlockPtr));
  if (has_allocated()) {
    std::memcpy(new_begin + new_capacity, begin_ + old_capacity,
                size * sizeof(BlockPtr));
  } else if (size >= 1) {
    RIEGELI_ASSERT_LE(size, 2u)
        << "Failed invariant of Chain: "
           "only two block pointers fit without allocating their array";
    new_begin[new_capacity].block_offset = 0;
    if (size == 2) {
      new_begin[new_capacity + 1].block_offset = new_begin[0].block_ptr->size();
    }
  }
  DeleteBlockPtrs();
  block_ptrs_.allocated.begin = new_allocated_begin;
  block_ptrs_.allocated.end = new_allocated_end;
  begin_ = new_begin;
  end_ = new_end;
}

inline void Chain::ReserveFrontSlow(size_t extra_capacity) {
  RIEGELI_ASSERT_GT(extra_capacity, 0u)
      << "Failed precondition of Chain::ReserveFrontSlow(): "
         "nothing to do, use ReserveFront() instead";
  BlockPtr* old_allocated_begin;
  BlockPtr* old_allocated_end;
  if (has_here()) {
    if (ABSL_PREDICT_TRUE(extra_capacity <=
                          PtrDistance(end_, block_ptrs_.here + 2))) {
      // There is space without reallocation. Shift 1 block pointer to the right
      // by 1, or 0 block pointers by 1 or 2, because `begin_` must remain at
      // `block_ptrs_.here`. There might be no pointer to copy; it is cheaper to
      // copy the array slot unconditionally.
      block_ptrs_.here[1] = block_ptrs_.here[0];
      begin_ += extra_capacity;
      end_ += extra_capacity;
      return;
    }
    old_allocated_begin = block_ptrs_.here;
    old_allocated_end = end_;
  } else {
    old_allocated_begin = block_ptrs_.allocated.begin;
    old_allocated_end = block_ptrs_.allocated.end;
  }
  RIEGELI_ASSERT_GT(extra_capacity, PtrDistance(old_allocated_begin, begin_))
      << "Failed precondition of Chain::ReserveFrontSlow(): "
         "extra capacity fits in allocated space, use ReserveFront() instead";
  RIEGELI_ASSERT_LE(extra_capacity, std::numeric_limits<size_t>::max() /
                                            (2 * sizeof(BlockPtr)) -
                                        PtrDistance(begin_, old_allocated_end))
      << "Failed invariant of Chain: array of block pointers overflow, "
         "possibly blocks are too small";
  const size_t old_capacity =
      PtrDistance(old_allocated_begin, old_allocated_end);
  const size_t size = PtrDistance(begin_, end_);
  if (size + extra_capacity <= old_capacity && 2 * size <= old_capacity) {
    RIEGELI_ASSERT(has_allocated())
        << "The case of has_here() if there is space without reallocation "
           "was handled above";
    // Existing array has enough capacity and is at most half full: move
    // contents to the middle of the array. This makes the amortized cost of
    // adding one element constant.
    BlockPtr* const new_begin =
        old_allocated_begin + (old_capacity - size + extra_capacity) / 2;
    // Moving right, so block offsets must be moved before block pointers.
    std::memmove(new_begin + old_capacity, begin_ + old_capacity,
                 size * sizeof(BlockPtr));
    std::memmove(new_begin, begin_, size * sizeof(BlockPtr));
    begin_ = new_begin;
    end_ = new_begin + size;
    return;
  }
  // Reallocate the array, keeping space after the contents unchanged. This
  // makes the amortized cost of adding one element constant.
  RIEGELI_ASSERT_LE(old_capacity / 2, std::numeric_limits<size_t>::max() /
                                              (2 * sizeof(BlockPtr)) -
                                          old_capacity)
      << "Failed invariant of Chain: array of block pointers overflow, "
         "possibly blocks are too small";
  const size_t new_capacity =
      UnsignedMax(PtrDistance(begin_, old_allocated_end) + extra_capacity,
                  old_capacity + old_capacity / 2, size_t{16});
  BlockPtr* const new_allocated_begin = NewBlockPtrs(new_capacity);
  BlockPtr* const new_allocated_end = new_allocated_begin + new_capacity;
  BlockPtr* const new_end =
      new_allocated_end - PtrDistance(end_, old_allocated_end);
  BlockPtr* const new_begin = new_end - size;
  std::memcpy(new_begin, begin_, size * sizeof(BlockPtr));
  if (has_allocated()) {
    std::memcpy(new_begin + new_capacity, begin_ + old_capacity,
                size * sizeof(BlockPtr));
  } else if (size >= 1) {
    RIEGELI_ASSERT_LE(size, 2u)
        << "Failed invariant of Chain: "
           "only two block pointers fit without allocating their array";
    new_begin[new_capacity].block_offset = 0;
    if (size == 2) {
      new_begin[new_capacity + 1].block_offset = new_begin[0].block_ptr->size();
    }
  }
  DeleteBlockPtrs();
  block_ptrs_.allocated.begin = new_allocated_begin;
  block_ptrs_.allocated.end = new_allocated_end;
  begin_ = new_begin;
  end_ = new_end;
}

inline size_t Chain::NewBlockCapacity(size_t replaced_length, size_t min_length,
                                      size_t recommended_length,
                                      Options options) const {
  RIEGELI_ASSERT_LE(replaced_length, size_)
      << "Failed precondition of Chain::NewBlockCapacity(): "
         "length to replace greater than current size";
  RIEGELI_ASSERT_LE(min_length, RawBlock::kMaxCapacity - replaced_length)
      << "Chain block capacity overflow";
  return replaced_length +
         ApplyBufferConstraints(
             ApplySizeHint(
                 UnsignedMax(size_, SaturatingSub(options.min_block_size(),
                                                  replaced_length)),
                 options.size_hint(), size_),
             min_length, recommended_length,
             SaturatingSub(options.max_block_size(), replaced_length));
}

absl::Span<char> Chain::AppendBuffer(size_t min_length,
                                     size_t recommended_length,
                                     size_t max_length, Options options) {
  RIEGELI_ASSERT_LE(min_length, max_length)
      << "Failed precondition of Chain::AppendBuffer(): "
         "min_length > max_length";
  RIEGELI_CHECK_LE(min_length, std::numeric_limits<size_t>::max() - size_)
      << "Failed precondition of Chain::AppendBuffer(): "
         "Chain size overflow";
  if (begin_ == end_) {
    RIEGELI_ASSERT_LE(size_, kMaxShortDataSize)
        << "Failed invariant of Chain: short data size too large";
    if (min_length <= kMaxShortDataSize - size_) {
      // Do not bother returning short data if `recommended_length` or
      // `size_hint` is larger, because data will likely need to be copied later
      // to a real block.
      if (recommended_length <= kMaxShortDataSize - size_ &&
          (options.size_hint() == std::nullopt ||
           *options.size_hint() <= kMaxShortDataSize)) {
        // Append the new space to short data.
        EnsureHasHere();
        const absl::Span<char> buffer(
            short_data_begin() + size_,
            UnsignedMin(max_length, kMaxShortDataSize - size_));
        size_ += buffer.size();
        return buffer;
      } else if (min_length == 0) {
        return absl::Span<char>();
      }
    }
    // Merge short data with the new space to a new block.
    IntrusiveSharedPtr<RawBlock> block;
    if (ABSL_PREDICT_FALSE(min_length > RawBlock::kMaxCapacity - size_)) {
      block = RawBlock::NewInternal(kMaxShortDataSize);
      block->AppendWithExplicitSizeToCopy(short_data(), kMaxShortDataSize);
      PushBack(std::move(block));
      block = RawBlock::NewInternal(
          NewBlockCapacity(0, min_length, recommended_length, options));
    } else {
      block = RawBlock::NewInternal(NewBlockCapacity(
          size_, UnsignedMax(min_length, kMaxShortDataSize - size_),
          recommended_length, options));
      block->AppendWithExplicitSizeToCopy(short_data(), kMaxShortDataSize);
    }
    PushBack(std::move(block));
  } else {
    if (back()->can_append(min_length)) {
      // New space can be appended in place.
    } else if (min_length == 0) {
      return absl::Span<char>();
    } else if (back()->tiny() &&
               ABSL_PREDICT_TRUE(min_length <=
                                 RawBlock::kMaxCapacity - back()->size())) {
      // The last block must be rewritten. Merge it with the new space to a
      // new block.
      IntrusiveSharedPtr<RawBlock> block =
          RawBlock::NewInternal(NewBlockCapacity(back()->size(), min_length,
                                                 recommended_length, options));
      block->Append(*back());
      SetBack(std::move(block));
    } else {
      IntrusiveSharedPtr<RawBlock> block;
      if (back()->wasteful()) {
        // The last block must be rewritten. Rewrite it separately from the new
        // block to avoid rewriting the same data again if the new block gets
        // only partially filled.
        IntrusiveSharedPtr<RawBlock> last = SetBack(back()->Copy());
        if (last->TryClear() && last->can_append(min_length)) {
          // Reuse this block.
          block = std::move(last);
        }
      }
      if (block == nullptr) {
        // Append a new block.
        block = RawBlock::NewInternal(
            NewBlockCapacity(0, min_length, recommended_length, options));
      }
      PushBack(std::move(block));
    }
  }
  const absl::Span<char> buffer = back()->AppendBuffer(
      UnsignedMin(max_length, std::numeric_limits<size_t>::max() - size_));
  RIEGELI_ASSERT_GE(buffer.size(), min_length)
      << "Chain::RawBlock::AppendBuffer() returned less than the free space";
  size_ += buffer.size();
  return buffer;
}

absl::Span<char> Chain::PrependBuffer(size_t min_length,
                                      size_t recommended_length,
                                      size_t max_length, Options options) {
  RIEGELI_ASSERT_LE(min_length, max_length)
      << "Failed precondition of Chain::PrependBuffer(): "
         "min_length > max_length";
  RIEGELI_CHECK_LE(min_length, std::numeric_limits<size_t>::max() - size_)
      << "Failed precondition of Chain::PrependBuffer(): "
         "Chain size overflow";
  if (begin_ == end_) {
    RIEGELI_ASSERT_LE(size_, kMaxShortDataSize)
        << "Failed invariant of Chain: short data size too large";
    if (min_length <= kMaxShortDataSize - size_) {
      // Do not bother returning short data if `recommended_length` or
      // `size_hint` is larger, because data will likely need to be copied later
      // to a real block.
      if (recommended_length <= kMaxShortDataSize - size_ &&
          (options.size_hint() == std::nullopt ||
           *options.size_hint() <= kMaxShortDataSize)) {
        // Prepend the new space to short data.
        EnsureHasHere();
        const absl::Span<char> buffer(
            short_data_begin(),
            UnsignedMin(max_length, kMaxShortDataSize - size_));
        std::memmove(buffer.data() + buffer.size(), short_data_begin(), size_);
        size_ += buffer.size();
        return buffer;
      } else if (min_length == 0) {
        return absl::Span<char>();
      }
    }
    // Merge short data with the new space to a new block.
    IntrusiveSharedPtr<RawBlock> block;
    if (ABSL_PREDICT_FALSE(min_length > RawBlock::kMaxCapacity - size_)) {
      block = RawBlock::NewInternal(kMaxShortDataSize);
      block->AppendWithExplicitSizeToCopy(short_data(), kMaxShortDataSize);
      PushFront(std::move(block));
      block = RawBlock::NewInternal(
          NewBlockCapacity(0, min_length, recommended_length, options));
    } else {
      block = RawBlock::NewInternal(
          NewBlockCapacity(size_, min_length, recommended_length, options));
      block->Prepend(short_data());
    }
    PushFront(std::move(block));
  } else {
    if (front()->can_prepend(min_length)) {
      // New space can be prepended in place.
    } else if (min_length == 0) {
      return absl::Span<char>();
    } else if (front()->tiny() &&
               ABSL_PREDICT_TRUE(min_length <=
                                 RawBlock::kMaxCapacity - front()->size())) {
      // The first block must be rewritten. Merge it with the new space to a
      // new block.
      IntrusiveSharedPtr<RawBlock> block =
          RawBlock::NewInternal(NewBlockCapacity(front()->size(), min_length,
                                                 recommended_length, options));
      block->Prepend(*front());
      SetFront(std::move(block));
    } else {
      IntrusiveSharedPtr<RawBlock> block;
      if (front()->wasteful()) {
        // The first block must be rewritten. Rewrite it separately from the new
        // block to avoid rewriting the same data again if the new block gets
        // only partially filled.
        IntrusiveSharedPtr<RawBlock> first = SetFrontSameSize(front()->Copy());
        if (first->TryClear() && first->can_prepend(min_length)) {
          // Reuse this block.
          block = std::move(first);
        }
      }
      if (block == nullptr) {
        // Prepend a new block.
        block = RawBlock::NewInternal(
            NewBlockCapacity(0, min_length, recommended_length, options));
      }
      PushFront(std::move(block));
    }
  }
  const absl::Span<char> buffer = front()->PrependBuffer(
      UnsignedMin(max_length, std::numeric_limits<size_t>::max() - size_));
  RIEGELI_ASSERT_GE(buffer.size(), min_length)
      << "Chain::RawBlock::PrependBuffer() returned less than the free space";
  RefreshFront();
  size_ += buffer.size();
  return buffer;
}

void Chain::Append(BytesRef src, Options options) {
  while (!src.empty()) {
    const absl::Span<char> buffer =
        AppendBuffer(1, src.size(), src.size(), options);
    std::memcpy(buffer.data(), src.data(), buffer.size());
    src.remove_prefix(buffer.size());
  }
}

void Chain::Append(const Chain& src, Options options) {
  AppendChain<ShareOwnership>(src, options);
}

void Chain::Append(Chain&& src, Options options) {
  AppendChain<PassOwnership>(std::move(src), options);
}

template <typename Ownership, typename ChainRef>
inline void Chain::AppendChain(ChainRef&& src, Options options) {
  if (src.begin_ == src.end_) {
    Append(src.short_data(), options);
    return;
  }
  RIEGELI_CHECK_LE(src.size(), std::numeric_limits<size_t>::max() - size_)
      << "Failed precondition of Chain::Append(Chain): "
         "Chain size overflow";
  const BlockPtr* src_iter = src.begin_;
  // If the first block of `src` is handled specially,
  // `(src_iter++)->block_ptr->Unref<Ownership>()` skips it so that
  // `AppendBlocks<Ownership>()` does not append it again.
  if (begin_ == end_) {
    if (src.front()->tiny() ||
        (src.end_ - src.begin_ > 1 && src.front()->wasteful())) {
      // The first block of `src` must be rewritten. Merge short data with it to
      // a new block.
      if (!short_data().empty() || !src.front()->empty()) {
        RIEGELI_ASSERT_LE(src.front()->size(), RawBlock::kMaxCapacity - size_)
            << "Sum of sizes of short data and a tiny or wasteful block "
               "exceeds RawBlock::kMaxCapacity";
        const size_t capacity =
            src.end_ - src.begin_ == 1
                ? NewBlockCapacity(size_,
                                   UnsignedMax(src.front()->size(),
                                               kMaxShortDataSize - size_),
                                   0, options)
                : UnsignedMax(size_ + src.front()->size(), kMaxShortDataSize);
        IntrusiveSharedPtr<RawBlock> merged = RawBlock::NewInternal(capacity);
        merged->AppendWithExplicitSizeToCopy(short_data(), kMaxShortDataSize);
        merged->Append(*src.front());
        PushBack(std::move(merged));
      }
      (src_iter++)->block_ptr->Unref<Ownership>();
    } else if (!empty()) {
      // Copy short data to a real block.
      IntrusiveSharedPtr<RawBlock> real =
          RawBlock::NewInternal(kMaxShortDataSize);
      real->AppendWithExplicitSizeToCopy(short_data(), kMaxShortDataSize);
      PushBack(std::move(real));
    }
  } else {
    if (back()->tiny() && src.front()->tiny()) {
    merge:
      // Boundary blocks must be merged, or they are both empty or wasteful so
      // merging them is cheaper than rewriting them separately.
      if (back()->empty() && src.front()->empty()) {
        PopBack();
      } else if (back()->can_append(src.front()->size()) &&
                 (src.end_ - src.begin_ == 1 ||
                  !back()->wasteful(src.front()->size()))) {
        // Boundary blocks can be appended in place; this is always cheaper than
        // merging them to a new block.
        back()->Append(*src.front());
      } else {
        // Boundary blocks cannot be appended in place. Merge them to a new
        // block.
        RIEGELI_ASSERT_LE(src.front()->size(),
                          RawBlock::kMaxCapacity - back()->size())
            << "Sum of sizes of two tiny or wasteful blocks exceeds "
               "RawBlock::kMaxCapacity";
        const size_t capacity =
            src.end_ - src.begin_ == 1
                ? NewBlockCapacity(back()->size(), src.front()->size(), 0,
                                   options)
                : back()->size() + src.front()->size();
        IntrusiveSharedPtr<RawBlock> merged = RawBlock::NewInternal(capacity);
        merged->Append(*back());
        merged->Append(*src.front());
        SetBack(std::move(merged));
      }
      (src_iter++)->block_ptr->Unref<Ownership>();
    } else if (back()->empty()) {
      if (src.end_ - src.begin_ > 1 && src.front()->wasteful()) goto merge;
      // The last block is empty and must be removed.
      PopBack();
    } else if (back()->wasteful()) {
      if (src.end_ - src.begin_ > 1 &&
          (src.front()->empty() || src.front()->wasteful())) {
        goto merge;
      }
      // The last block must reduce waste.
      if (back()->can_append(src.front()->size()) &&
          (src.end_ - src.begin_ == 1 ||
           !back()->wasteful(src.front()->size())) &&
          src.front()->size() <= kAllocationCost + back()->size()) {
        // Appending in place is possible and is cheaper than rewriting the last
        // block.
        back()->Append(*src.front());
        (src_iter++)->block_ptr->Unref<Ownership>();
      } else {
        // Appending in place is not possible, or rewriting the last block is
        // cheaper.
        SetBack(back()->Copy());
      }
    } else if (src.end_ - src.begin_ > 1) {
      if (src.front()->empty()) {
        // The first block of `src` is empty and must be skipped.
        (src_iter++)->block_ptr->Unref<Ownership>();
      } else if (src.front()->wasteful()) {
        // The first block of `src` must reduce waste.
        if (back()->can_append(src.front()->size()) &&
            !back()->wasteful(src.front()->size())) {
          // Appending in place is possible; this is always cheaper than
          // rewriting the first block of `src`.
          back()->Append(*src.front());
        } else {
          // Appending in place is not possible.
          PushBack(src.front()->Copy());
        }
        (src_iter++)->block_ptr->Unref<Ownership>();
      }
    }
  }
  size_ += src.size_;
  AppendBlocks<Ownership>(src_iter, src.end_);
  src.DropPassedBlocks(Ownership());
}

void Chain::Append(const Block& src, Options options) {
  if (src.raw_block() != nullptr) AppendRawBlock(src.raw_block(), options);
}

void Chain::Append(Block&& src, Options options) {
  if (src.raw_block() != nullptr) {
    AppendRawBlock(std::move(src).raw_block(), options);
  }
}

template <typename RawBlockPtrRef>
inline void Chain::AppendRawBlock(RawBlockPtrRef&& block, Options options) {
  RIEGELI_CHECK_LE(block->size(), std::numeric_limits<size_t>::max() - size_)
      << "Failed precondition of Chain::Append(Block): "
         "Chain size overflow";
  if (begin_ == end_) {
    if (!short_data().empty()) {
      if (block->tiny()) {
        // The block must be rewritten. Merge short data with it to a new block.
        RIEGELI_ASSERT_LE(block->size(), RawBlock::kMaxCapacity - size_)
            << "Sum of sizes of short data and a tiny block exceeds "
               "RawBlock::kMaxCapacity";
        const size_t capacity = NewBlockCapacity(
            size_, UnsignedMax(block->size(), kMaxShortDataSize - size_), 0,
            options);
        IntrusiveSharedPtr<RawBlock> merged = RawBlock::NewInternal(capacity);
        merged->AppendWithExplicitSizeToCopy(short_data(), kMaxShortDataSize);
        merged->Append(*block);
        PushBack(std::move(merged));
        size_ += block->size();
        return;
      }
      // Copy short data to a real block.
      IntrusiveSharedPtr<RawBlock> real =
          RawBlock::NewInternal(kMaxShortDataSize);
      real->AppendWithExplicitSizeToCopy(short_data(), kMaxShortDataSize);
      PushBack(std::move(real));
    }
  } else {
    if (back()->tiny() && block->tiny()) {
      // Boundary blocks must be merged.
      if (back()->can_append(block->size())) {
        // Boundary blocks can be appended in place; this is always cheaper than
        // merging them to a new block.
        back()->Append(*block);
      } else {
        // Boundary blocks cannot be appended in place. Merge them to a new
        // block.
        RIEGELI_ASSERT_LE(block->size(),
                          RawBlock::kMaxCapacity - back()->size())
            << "Sum of sizes of two tiny blocks exceeds RawBlock::kMaxCapacity";
        IntrusiveSharedPtr<RawBlock> merged = RawBlock::NewInternal(
            NewBlockCapacity(back()->size(), block->size(), 0, options));
        merged->Append(*back());
        merged->Append(*block);
        SetBack(std::move(merged));
      }
      size_ += block->size();
      return;
    }
    if (back()->empty()) {
      // The last block is empty and must be removed.
      size_ += block->size();
      SetBack(std::forward<RawBlockPtrRef>(block));
      return;
    }
    if (back()->wasteful()) {
      // The last block must reduce waste.
      if (back()->can_append(block->size()) &&
          block->size() <= kAllocationCost + back()->size()) {
        // Appending in place is possible and is cheaper than rewriting the last
        // block.
        back()->Append(*block);
        size_ += block->size();
        return;
      }
      // Appending in place is not possible, or rewriting the last block is
      // cheaper.
      SetBack(back()->Copy());
    }
  }
  size_ += block->size();
  PushBack(std::forward<RawBlockPtrRef>(block));
}

void Chain::Append(const absl::Cord& src, Options options) {
  AppendCord(src, options);
}

void Chain::Append(absl::Cord&& src, Options options) {
  AppendCord(std::move(src), options);
}

template <typename CordRef>
void Chain::AppendCord(CordRef&& src, Options options) {
  if (const std::optional<absl::string_view> flat = src.TryFlat();
      flat != std::nullopt) {
    if (flat->size() <= MaxBytesToCopy(options)) {
      Append(*flat, options);
    } else {
      Append(Block(riegeli::Maker<FlatCordBlock>(std::forward<CordRef>(src))),
             options);
    }
    return;
  }
  AppendCordSlow(std::forward<CordRef>(src), options);
}

template <typename CordRef>
inline void Chain::AppendCordSlow(CordRef&& src, Options options) {
  // Avoid creating wasteful blocks and then rewriting them: append copied
  // fragments when their accumulated size is known, tweaking `size_hint` for
  // block sizing.
  absl::InlinedVector<absl::string_view, 16> copied_fragments;
  Options copy_options = options;
  copy_options.set_size_hint(size());
  absl::Cord::CharIterator iter = src.char_begin();
  while (iter != src.char_end()) {
    const absl::string_view fragment = absl::Cord::ChunkRemaining(iter);
    if (fragment.size() <= kMaxBytesToCopy) {
      copied_fragments.push_back(fragment);
      copy_options.set_size_hint(*copy_options.size_hint() + fragment.size());
      absl::Cord::Advance(&iter, fragment.size());
    } else {
      for (const absl::string_view copied_fragment : copied_fragments) {
        Append(copied_fragment, copy_options);
      }
      copied_fragments.clear();
      Append(Block(riegeli::Maker<FlatCordBlock>(
                 riegeli::Invoker([&iter, size = fragment.size()]() {
                   return absl::Cord::AdvanceAndRead(&iter, size);
                 }))),
             options);
      copy_options.set_size_hint(size());
    }
  }
  for (const absl::string_view copied_fragment : copied_fragments) {
    Append(copied_fragment, options);
  }
}

void Chain::Prepend(BytesRef src, Options options) {
  while (!src.empty()) {
    const absl::Span<char> buffer =
        PrependBuffer(1, src.size(), src.size(), options);
    std::memcpy(buffer.data(), src.data() + (src.size() - buffer.size()),
                buffer.size());
    src.remove_suffix(buffer.size());
  }
}

void Chain::Prepend(const Chain& src, Options options) {
  PrependChain<ShareOwnership>(src, options);
}

void Chain::Prepend(Chain&& src, Options options) {
  PrependChain<PassOwnership>(std::move(src), options);
}

template <typename Ownership, typename ChainRef>
inline void Chain::PrependChain(ChainRef&& src, Options options) {
  if (src.begin_ == src.end_) {
    Prepend(src.short_data(), options);
    return;
  }
  RIEGELI_CHECK_LE(src.size(), std::numeric_limits<size_t>::max() - size_)
      << "Failed precondition of Chain::Prepend(Chain): "
         "Chain size overflow";
  const BlockPtr* src_iter = src.end_;
  // If the last block of src is handled specially,
  // `(--src_iter)->block_ptr->Unref<Ownership>()` skips it so that
  // `PrependBlocks<Ownership>()` does not prepend it again.
  if (begin_ == end_) {
    if (src.back()->tiny() ||
        (src.end_ - src.begin_ > 1 && src.back()->wasteful())) {
      // The last block of `src` must be rewritten. Merge short data with it to
      // a new block.
      if (!short_data().empty() || !src.back()->empty()) {
        RIEGELI_ASSERT_LE(src.back()->size(), RawBlock::kMaxCapacity - size_)
            << "Sum of sizes of short data and a tiny or wasteful block "
               "exceeds RawBlock::kMaxCapacity";
        const size_t capacity =
            src.end_ - src.begin_ == 1
                ? NewBlockCapacity(size_, src.back()->size(), 0, options)
                : size_ + src.back()->size();
        IntrusiveSharedPtr<RawBlock> merged = RawBlock::NewInternal(capacity);
        merged->Prepend(short_data());
        merged->Prepend(*src.back());
        PushFront(std::move(merged));
      }
      (--src_iter)->block_ptr->Unref<Ownership>();
    } else if (!empty()) {
      // Copy short data to a real block.
      IntrusiveSharedPtr<RawBlock> real =
          RawBlock::NewInternal(kMaxShortDataSize);
      real->AppendWithExplicitSizeToCopy(short_data(), kMaxShortDataSize);
      PushFront(std::move(real));
    }
  } else {
    if (front()->tiny() && src.back()->tiny()) {
    merge:
      // Boundary blocks must be merged, or they are both empty or wasteful so
      // merging them is cheaper than rewriting them separately.
      if (src.back()->empty() && front()->empty()) {
        PopFront();
      } else if (front()->can_prepend(src.back()->size()) &&
                 (src.end_ - src.begin_ == 1 ||
                  !front()->wasteful(src.back()->size()))) {
        // Boundary blocks can be prepended in place; this is always cheaper
        // than merging them to a new block.
        front()->Prepend(*src.back());
        RefreshFront();
      } else {
        // Boundary blocks cannot be prepended in place. Merge them to a new
        // block.
        RIEGELI_ASSERT_LE(src.back()->size(),
                          RawBlock::kMaxCapacity - front()->size())
            << "Sum of sizes of two tiny or wasteful blocks exceeds "
               "RawBlock::kMaxCapacity";
        const size_t capacity =
            src.end_ - src.begin_ == 1
                ? NewBlockCapacity(front()->size(), src.back()->size(), 0,
                                   options)
                : front()->size() + src.back()->size();
        IntrusiveSharedPtr<RawBlock> merged = RawBlock::NewInternal(capacity);
        merged->Prepend(*front());
        merged->Prepend(*src.back());
        SetFront(std::move(merged));
      }
      (--src_iter)->block_ptr->Unref<Ownership>();
    } else if (front()->empty()) {
      if (src.end_ - src.begin_ > 1 && src.back()->wasteful()) goto merge;
      // The first block is empty and must be removed.
      PopFront();
    } else if (front()->wasteful()) {
      if (src.end_ - src.begin_ > 1 &&
          (src.back()->empty() || src.back()->wasteful())) {
        goto merge;
      }
      // The first block must reduce waste.
      if (front()->can_prepend(src.back()->size()) &&
          (src.end_ - src.begin_ == 1 ||
           !front()->wasteful(src.back()->size())) &&
          src.back()->size() <= kAllocationCost + front()->size()) {
        // Prepending in place is possible and is cheaper than rewriting the
        // first block.
        front()->Prepend(*src.back());
        RefreshFront();
        (--src_iter)->block_ptr->Unref<Ownership>();
      } else {
        // Prepending in place is not possible, or rewriting the first block is
        // cheaper.
        SetFrontSameSize(front()->Copy());
      }
    } else if (src.end_ - src.begin_ > 1) {
      if (src.back()->empty()) {
        // The last block of `src` is empty and must be skipped.
        (--src_iter)->block_ptr->Unref<Ownership>();
      } else if (src.back()->wasteful()) {
        // The last block of `src` must reduce waste.
        if (front()->can_prepend(src.back()->size()) &&
            !front()->wasteful(src.back()->size())) {
          // Prepending in place is possible; this is always cheaper than
          // rewriting the last block of `src`.
          front()->Prepend(*src.back());
          RefreshFront();
        } else {
          // Prepending in place is not possible.
          PushFront(src.back()->Copy());
        }
        (--src_iter)->block_ptr->Unref<Ownership>();
      }
    }
  }
  size_ += src.size_;
  PrependBlocks<Ownership>(src.begin_, src_iter);
  src.DropPassedBlocks(Ownership());
}

void Chain::Prepend(const Block& src, Options options) {
  if (src.raw_block() != nullptr) PrependRawBlock(src.raw_block(), options);
}

void Chain::Prepend(Block&& src, Options options) {
  if (src.raw_block() != nullptr) {
    PrependRawBlock(std::move(src).raw_block(), options);
  }
}

template <typename RawBlockPtrRef>
inline void Chain::PrependRawBlock(RawBlockPtrRef&& block, Options options) {
  RIEGELI_CHECK_LE(block->size(), std::numeric_limits<size_t>::max() - size_)
      << "Failed precondition of Chain::Prepend(Block): "
         "Chain size overflow";
  if (begin_ == end_) {
    if (!short_data().empty()) {
      if (block->tiny()) {
        // The block must be rewritten. Merge short data with it to a new block.
        RIEGELI_ASSERT_LE(block->size(), RawBlock::kMaxCapacity - size_)
            << "Sum of sizes of short data and a tiny block exceeds "
               "RawBlock::kMaxCapacity";
        const size_t capacity =
            NewBlockCapacity(size_, block->size(), 0, options);
        IntrusiveSharedPtr<RawBlock> merged = RawBlock::NewInternal(capacity);
        merged->Prepend(short_data());
        merged->Prepend(*block);
        PushFront(std::move(merged));
        size_ += block->size();
        return;
      }
      // Copy short data to a real block.
      IntrusiveSharedPtr<RawBlock> real =
          RawBlock::NewInternal(kMaxShortDataSize);
      real->AppendWithExplicitSizeToCopy(short_data(), kMaxShortDataSize);
      PushFront(std::move(real));
    }
  } else {
    if (front()->tiny() && block->tiny()) {
      // Boundary blocks must be merged.
      if (front()->can_prepend(block->size())) {
        // Boundary blocks can be prepended in place; this is always cheaper
        // than merging them to a new block.
        front()->Prepend(*block);
        RefreshFront();
      } else {
        // Boundary blocks cannot be prepended in place. Merge them to a new
        // block.
        RIEGELI_ASSERT_LE(block->size(),
                          RawBlock::kMaxCapacity - front()->size())
            << "Sum of sizes of two tiny blocks exceeds RawBlock::kMaxCapacity";
        IntrusiveSharedPtr<RawBlock> merged = RawBlock::NewInternal(
            NewBlockCapacity(front()->size(), block->size(), 0, options));
        merged->Prepend(*front());
        merged->Prepend(*block);
        SetFront(std::move(merged));
      }
      size_ += block->size();
      return;
    }
    if (front()->empty()) {
      // The first block is empty and must be removed.
      size_ += block->size();
      SetFront(std::forward<RawBlockPtrRef>(block));
      return;
    }
    if (front()->wasteful()) {
      // The first block must reduce waste.
      if (front()->can_prepend(block->size()) &&
          block->size() <= kAllocationCost + front()->size()) {
        // Prepending in place is possible and is cheaper than rewriting the
        // first block.
        front()->Prepend(*block);
        RefreshFront();
        size_ += block->size();
        return;
      }
      // Prepending in place is not possible, or rewriting the first block is
      // cheaper.
      SetFrontSameSize(front()->Copy());
    }
  }
  size_ += block->size();
  PushFront(std::forward<RawBlockPtrRef>(block));
}

void Chain::Prepend(const absl::Cord& src, Options options) {
  PrependCord(src, options);
}

void Chain::Prepend(absl::Cord&& src, Options options) {
  PrependCord(std::move(src), options);
}

template <typename CordRef>
inline void Chain::PrependCord(CordRef&& src, Options options) {
  if (src.size() <= MaxBytesToCopy(options)) {
    if (const std::optional<absl::string_view> flat = src.TryFlat();
        flat != std::nullopt) {
      Prepend(*flat, options);
      return;
    }
  }
  Prepend(Chain(std::forward<CordRef>(src)), options);
}

void Chain::AppendFrom(absl::Cord::CharIterator& iter, size_t length,
                       Options options) {
  // Avoid creating wasteful blocks and then rewriting them: append copied
  // fragments when their accumulated size is known, tweaking `size_hint` for
  // block sizing.
  absl::InlinedVector<absl::string_view, 16> copied_fragments;
  Options copy_options = options;
  copy_options.set_size_hint(size());
  while (length > 0) {
    absl::string_view fragment = absl::Cord::ChunkRemaining(iter);
    fragment = absl::string_view(fragment.data(),
                                 UnsignedMin(fragment.size(), length));
    if (fragment.size() <= kMaxBytesToCopy) {
      copied_fragments.push_back(fragment);
      copy_options.set_size_hint(*copy_options.size_hint() + fragment.size());
      absl::Cord::Advance(&iter, fragment.size());
    } else {
      for (const absl::string_view copied_fragment : copied_fragments) {
        Append(copied_fragment, copy_options);
      }
      copied_fragments.clear();
      Append(Block(riegeli::Maker<FlatCordBlock>(
                 riegeli::Invoker([&iter, size = fragment.size()]() {
                   return absl::Cord::AdvanceAndRead(&iter, size);
                 }))),
             options);
      copy_options.set_size_hint(size());
    }
    length -= fragment.size();
  }
  for (const absl::string_view copied_fragment : copied_fragments) {
    Append(copied_fragment, options);
  }
}

void Chain::RemoveSuffix(size_t length, Options options) {
  if (length == 0) return;
  RIEGELI_CHECK_LE(length, size())
      << "Failed precondition of Chain::RemoveSuffix(): "
      << "length to remove greater than current size";
  size_ -= length;
  if (begin_ == end_) {
    // `Chain` has short data which have suffix removed in place.
    return;
  }
  while (length > back()->size()) {
    length -= back()->size();
    PopBack();
    RIEGELI_ASSERT_NE(begin_, end_)
        << "Failed invariant of Chain: "
           "sum of block sizes smaller than Chain size";
  }
  if (back()->TryRemoveSuffix(length)) {
    if (end_ - begin_ > 1 && back()->tiny() && end_[-2].block_ptr->tiny()) {
      // Last two blocks must be merged.
      IntrusiveSharedPtr<RawBlock> last = PopBack();
      if (!last->empty()) {
        RIEGELI_ASSERT_LE(last->size(), RawBlock::kMaxCapacity - back()->size())
            << "Sum of sizes of two tiny blocks exceeds "
               "RawBlock::kMaxCapacity";
        IntrusiveSharedPtr<RawBlock> merged = RawBlock::NewInternal(
            NewBlockCapacity(back()->size() + last->size(), 0, 0, options));
        merged->Append(*back());
        merged->Append(*last);
        SetBack(std::move(merged));
      }
    }
    return;
  }
  IntrusiveSharedPtr<RawBlock> last = PopBack();
  if (length == last->size()) return;
  absl::string_view data = *last;
  data.remove_suffix(length);
  // Compensate for increasing `size_` by `Append()`.
  size_ -= data.size();
  Append(ExternalRef(riegeli::Invoker(MakeBlock(), std::move(last)), data),
         options);
}

void Chain::RemovePrefix(size_t length, Options options) {
  if (length == 0) return;
  RIEGELI_CHECK_LE(length, size())
      << "Failed precondition of Chain::RemovePrefix(): "
      << "length to remove greater than current size";
  size_ -= length;
  if (begin_ == end_) {
    // `Chain` has short data which have prefix removed by shifting the rest.
    std::memmove(short_data_begin(), short_data_begin() + length, size_);
    return;
  }
  while (length > front()->size()) {
    length -= front()->size();
    PopFront();
    RIEGELI_ASSERT_NE(begin_, end_)
        << "Failed invariant of Chain: "
           "sum of block sizes smaller than Chain size";
  }
  if (front()->TryRemovePrefix(length)) {
    RefreshFront();
    if (end_ - begin_ > 1 && front()->tiny() && begin_[1].block_ptr->tiny()) {
      // First two blocks must be merged.
      IntrusiveSharedPtr<RawBlock> first = PopFront();
      if (!first->empty()) {
        RIEGELI_ASSERT_LE(first->size(),
                          RawBlock::kMaxCapacity - front()->size())
            << "Sum of sizes of two tiny blocks exceeds "
               "RawBlock::kMaxCapacity";
        IntrusiveSharedPtr<RawBlock> merged = RawBlock::NewInternal(
            NewBlockCapacity(first->size() + front()->size(), 0, 0, options));
        merged->Prepend(*front());
        merged->Prepend(*first);
        SetFront(std::move(merged));
      }
    }
    return;
  }
  IntrusiveSharedPtr<RawBlock> first = PopFront();
  if (length == first->size()) return;
  absl::string_view data = *first;
  data.remove_prefix(length);
  // Compensate for increasing `size_` by `Prepend()`.
  size_ -= data.size();
  Prepend(ExternalRef(riegeli::Invoker(MakeBlock(), std::move(first)), data),
          options);
}

void swap(Chain& a, Chain& b) noexcept {
  using std::swap;
  if (a.has_here()) {
    a.begin_ = b.block_ptrs_.here + (a.begin_ - a.block_ptrs_.here);
    a.end_ = b.block_ptrs_.here + (a.end_ - a.block_ptrs_.here);
  }
  if (b.has_here()) {
    b.begin_ = a.block_ptrs_.here + (b.begin_ - b.block_ptrs_.here);
    b.end_ = a.block_ptrs_.here + (b.end_ - b.block_ptrs_.here);
  }
  swap(a.block_ptrs_, b.block_ptrs_);
  swap(a.begin_, b.begin_);
  swap(a.end_, b.end_);
  swap(a.size_, b.size_);
}

StrongOrdering Chain::Compare(const Chain& a, const Chain& b) {
  BlockIterator a_iter = a.blocks().cbegin();
  BlockIterator b_iter = b.blocks().cbegin();
  size_t this_pos = 0;
  size_t that_pos = 0;
  while (a_iter != a.blocks().cend()) {
    if (b_iter == b.blocks().cend()) {
      do {
        if (!a_iter->empty()) return StrongOrdering::greater;
        ++a_iter;
      } while (a_iter != a.blocks().cend());
      return StrongOrdering::equal;
    }
    const size_t length =
        UnsignedMin(a_iter->size() - this_pos, b_iter->size() - that_pos);
    if (const int ordering = std::memcmp(a_iter->data() + this_pos,
                                         b_iter->data() + that_pos, length);
        ordering != 0) {
      return AsStrongOrdering(ordering);
    }
    this_pos += length;
    if (this_pos == a_iter->size()) {
      ++a_iter;
      this_pos = 0;
    }
    that_pos += length;
    if (that_pos == b_iter->size()) {
      ++b_iter;
      that_pos = 0;
    }
  }
  while (b_iter != b.blocks().cend()) {
    if (!b_iter->empty()) return StrongOrdering::less;
    ++b_iter;
  }
  return StrongOrdering::equal;
}

StrongOrdering Chain::Compare(const Chain& a, absl::string_view b) {
  BlockIterator a_iter = a.blocks().cbegin();
  size_t this_pos = 0;
  size_t that_pos = 0;
  while (a_iter != a.blocks().cend()) {
    if (that_pos == b.size()) {
      do {
        if (!a_iter->empty()) return StrongOrdering::greater;
        ++a_iter;
      } while (a_iter != a.blocks().cend());
      return StrongOrdering::equal;
    }
    const size_t length =
        UnsignedMin(a_iter->size() - this_pos, b.size() - that_pos);
    if (const int ordering =
            std::memcmp(a_iter->data() + this_pos, b.data() + that_pos, length);
        ordering != 0) {
      return AsStrongOrdering(ordering);
    }
    this_pos += length;
    if (this_pos == a_iter->size()) {
      ++a_iter;
      this_pos = 0;
    }
    that_pos += length;
  }
  return that_pos == b.size() ? StrongOrdering::equal : StrongOrdering::less;
}

void Chain::Output(std::ostream& dest) const {
  WriteWithPadding(dest, size(), [&] {
    for (const absl::string_view fragment : blocks()) {
      dest.write(fragment.data(), IntCast<std::streamsize>(fragment.size()));
    }
  });
}

void Chain::VerifyInvariants() const {
#if RIEGELI_DEBUG
  if (begin_ == end_) {
    if (has_here()) {
      RIEGELI_CHECK_LE(size(), kMaxShortDataSize);
    } else {
      RIEGELI_CHECK_EQ(size(), 0u);
    }
  } else {
    RIEGELI_CHECK_LE(begin_, end_);
    if (has_here()) {
      RIEGELI_CHECK_LE(PtrDistance(begin_, end_), 2u);
    } else {
      RIEGELI_CHECK_GE(begin_, block_ptrs_.allocated.begin);
      RIEGELI_CHECK_LE(end_, block_ptrs_.allocated.end);
    }
    bool is_tiny = false;
    size_t offset =
        has_allocated() ? begin_[block_offsets()].block_offset : size_t{0};
    const BlockPtr* iter = begin_;
    do {
      if (is_tiny) {
        RIEGELI_CHECK(!iter->block_ptr->tiny());
        is_tiny = false;
      } else {
        is_tiny = iter->block_ptr->tiny();
      }
      if (iter != begin_ && iter != end_ - 1) {
        RIEGELI_CHECK(!iter->block_ptr->empty());
        RIEGELI_CHECK(!iter->block_ptr->wasteful());
      }
      if (has_allocated()) {
        RIEGELI_CHECK_EQ(iter[block_offsets()].block_offset, offset);
      }
      offset += iter->block_ptr->size();
      ++iter;
    } while (iter != end_);
    if (has_allocated()) offset -= begin_[block_offsets()].block_offset;
    RIEGELI_CHECK_EQ(size(), offset);
  }
#endif
}

}  // namespace riegeli


================================================
FILE: riegeli/base/chain.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_CHAIN_H_
#define RIEGELI_BASE_CHAIN_H_

#include "riegeli/base/chain_base.h"            // IWYU pragma: export
#include "riegeli/base/chain_details.h"         // IWYU pragma: export
#include "riegeli/base/external_ref_base.h"     // IWYU pragma: keep
#include "riegeli/base/external_ref_support.h"  // IWYU pragma: keep

#endif  // RIEGELI_BASE_CHAIN_H_


================================================
FILE: riegeli/base/chain_base.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_CHAIN_BASE_H_
#define RIEGELI_BASE_CHAIN_BASE_H_

// IWYU pragma: private, include "riegeli/base/chain.h"

#include <stddef.h>
#include <stdint.h>

#include <cstring>
#include <functional>
#include <iosfwd>
#include <limits>
#include <optional>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/macros.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/external_data.h"
#include "riegeli/base/external_ref_support.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/intrusive_shared_ptr.h"
#include "riegeli/base/memory_estimator.h"
#include "riegeli/base/ownership.h"
#include "riegeli/base/ref_count.h"
#include "riegeli/base/type_traits.h"

namespace riegeli {

class ExternalRef;

// A `Chain` represents a sequence of bytes. It supports efficient appending and
// prepending, and sharing memory with other `Chain`s and other types. It does
// not support efficient random access.
//
// A `Chain` can be written using `ChainWriter` and `ChainBackwardWriter`,
// and can be read using `ChainReader`. `Chain` itself exposes lower level
// appending/prepending and iteration functions.
//
// A `Chain` is implemented with a sequence of blocks holding flat data
// fragments.
class Chain : public WithCompare<Chain> {
 private:
  class RawBlock;

  // A union of either a block pointer or a block offset. Having a union makes
  // easier to allocate an array containing both kinds of data, with block
  // offsets following block pointers.
  union BlockPtr {
    RawBlock* block_ptr;
    size_t block_offset;
  };

  static constexpr size_t kMaxShortDataSize = 2 * sizeof(BlockPtr);

 public:
  class Options {
   public:
    Options() noexcept {}

    // Expected final size, or `std::nullopt` if unknown. This may improve
    // performance and memory usage.
    //
    // If the size hint turns out to not match reality, nothing breaks.
    Options& set_size_hint(std::optional<size_t> size_hint) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      if (size_hint == std::nullopt) {
        size_hint_ = std::numeric_limits<size_t>::max();
      } else {
        size_hint_ =
            UnsignedMin(*size_hint, std::numeric_limits<size_t>::max() - 1);
      }
      return *this;
    }
    Options&& set_size_hint(std::optional<size_t> size_hint) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_size_hint(size_hint));
    }
    std::optional<size_t> size_hint() const {
      if (size_hint_ == std::numeric_limits<size_t>::max()) {
        return std::nullopt;
      } else {
        return size_hint_;
      }
    }

    // Minimal size of a block of allocated data.
    //
    // This is used initially, while the destination is small.
    //
    // Default: `kDefaultMinBlockSize` (512).
    Options& set_min_block_size(size_t min_block_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      min_block_size_ = UnsignedMin(min_block_size, uint32_t{1} << 31);
      return *this;
    }
    Options&& set_min_block_size(size_t min_block_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_min_block_size(min_block_size));
    }
    size_t min_block_size() const { return min_block_size_; }

    // Maximal size of a block of allocated data.
    //
    // This is for performance tuning, not a guarantee: does not apply to
    // objects allocated separately and then appended to this `Chain`.
    //
    // Default: `kDefaultMaxBlockSize` (64K).
    Options& set_max_block_size(size_t max_block_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GT(max_block_size, 0u)
          << "Failed precondition of Chain::Options::set_max_block_size(): "
             "zero block size";
      max_block_size_ = UnsignedMin(max_block_size, uint32_t{1} << 31);
      return *this;
    }
    Options&& set_max_block_size(size_t max_block_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_max_block_size(max_block_size));
    }
    size_t max_block_size() const { return max_block_size_; }

    // A shortcut for `set_min_block_size(block_size)` with
    // `set_max_block_size(block_size)`.
    Options& set_block_size(size_t block_size) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return set_min_block_size(block_size).set_max_block_size(block_size);
    }
    Options&& set_block_size(size_t block_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_block_size(block_size));
    }

   private:
    // `std::nullopt` is encoded as `std::numeric_limits<size_t>::max()` to
    // reduce object size.
    size_t size_hint_ = std::numeric_limits<size_t>::max();
    // Use `uint32_t` instead of `size_t` to reduce the object size.
    uint32_t min_block_size_ = uint32_t{kDefaultMinBlockSize};
    uint32_t max_block_size_ = uint32_t{kDefaultMaxBlockSize};
  };

  class Block;
  class BlockRef;
  class BlockIterator;
  class Blocks;
  struct BlockAndChar;

  // A sentinel value for the `max_length` parameter of
  // `AppendBuffer()`/`PrependBuffer()`.
  static constexpr size_t kAnyLength = std::numeric_limits<size_t>::max();

  static constexpr size_t kMaxBytesToCopyToEmpty = kMaxShortDataSize;

  size_t MaxBytesToCopy(Options options = Options()) const {
    if (options.size_hint() != std::nullopt && size() < *options.size_hint()) {
      return UnsignedClamp(*options.size_hint() - size() - 1,
                           kMaxBytesToCopyToEmpty, kMaxBytesToCopy);
    }
    if (empty()) return kMaxBytesToCopyToEmpty;
    return kMaxBytesToCopy;
  }

  // Allocated size of an external block containing an external object of type
  // `T`.
  template <typename T>
  static constexpr size_t kExternalAllocatedSize();

  constexpr Chain() = default;

  // Converts from a string-like type.
  explicit Chain(BytesRef src);
  explicit Chain(ExternalRef src);
  template <typename Src,
            std::enable_if_t<SupportsExternalRefWhole<Src>::value, int> = 0>
  explicit Chain(Src&& src);
  explicit Chain(Block src);
  explicit Chain(const absl::Cord& src);
  explicit Chain(absl::Cord&& src);

  Chain(const Chain& that);
  Chain& operator=(const Chain& that);

  // The source `Chain` is left cleared.
  //
  // Moving a `Chain` invalidates its `BlockIterator`s and data pointers, but
  // the shape of blocks (their number and sizes) remains unchanged.
  Chain(Chain&& that) noexcept;
  Chain& operator=(Chain&& that) noexcept;

  ~Chain();

  // Makes `*this` equivalent to a newly constructed `Chain`. This avoids
  // constructing a temporary `Chain` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset();
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(BytesRef src);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(ExternalRef src);
  template <typename Src,
            std::enable_if_t<SupportsExternalRefWhole<Src>::value, int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Src&& src);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Block src);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(const absl::Cord& src);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(absl::Cord&& src);

  // Removes all data.
  ABSL_ATTRIBUTE_REINITIALIZES void Clear();

  // A container of `absl::string_view` blocks comprising data of the `Chain`.
  Blocks blocks() const ABSL_ATTRIBUTE_LIFETIME_BOUND;

  bool empty() const { return size_ == 0; }
  size_t size() const { return size_; }

  void CopyTo(char* dest) const;
  void AppendTo(std::string& dest) const&;
  void AppendTo(std::string& dest) &&;
  void AppendTo(absl::Cord& dest) const&;
  void AppendTo(absl::Cord& dest) &&;
  void PrependTo(absl::Cord& dest) const&;
  void PrependTo(absl::Cord& dest) &&;
  explicit operator std::string() const&;
  explicit operator std::string() &&;
  explicit operator absl::Cord() const&;
  explicit operator absl::Cord() &&;

  // If the `Chain` contents are flat, returns them, otherwise returns
  // `std::nullopt`.
  std::optional<absl::string_view> TryFlat() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // If the `Chain` contents are not flat, flattens them in place. Returns flat
  // contents.
  absl::string_view Flatten() ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Locates the block containing the given character position, and the
  // character index within the block.
  //
  // The opposite conversion is `Chain::BlockIterator::CharIndexInChain()`.
  //
  // Precondition: `char_index_in_chain <= size()`
  BlockAndChar BlockAndCharIndex(size_t char_index_in_chain) const;

  // Shows internal structure in a human-readable way, for debugging.
  void DumpStructure(std::ostream& dest) const;

  // Supports `MemoryEstimator`.
  friend void RiegeliRegisterSubobjects(const Chain* self,
                                        MemoryEstimator& memory_estimator) {
    self->RegisterSubobjects(memory_estimator);
  }

  // Appends/prepends some uninitialized space. The buffer will have length at
  // least `min_length`, preferably `recommended_length`, and at most
  // `max_length`.
  //
  // If `min_length == 0`, returns whatever space was already allocated
  // (possibly an empty buffer) without invalidating existing pointers. If the
  // `Chain` was empty then the empty contents can be moved.
  //
  // If `recommended_length < min_length`, `recommended_length` is assumed to be
  // `min_length`.
  //
  // If `max_length == kAnyLength`, there is no maximum.
  //
  // Precondition: `min_length <= max_length`
  absl::Span<char> AppendBuffer(
      size_t min_length, size_t recommended_length = 0,
      size_t max_length = kAnyLength,
      Options options = Options()) ABSL_ATTRIBUTE_LIFETIME_BOUND;
  absl::Span<char> PrependBuffer(
      size_t min_length, size_t recommended_length = 0,
      size_t max_length = kAnyLength,
      Options options = Options()) ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Equivalent to `AppendBuffer()`/`PrependBuffer()` with
  // `min_length == max_length`.
  absl::Span<char> AppendFixedBuffer(size_t length, Options options = Options())
      ABSL_ATTRIBUTE_LIFETIME_BOUND;
  absl::Span<char> PrependFixedBuffer(
      size_t length, Options options = Options()) ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Appends/prepends a string-like type.
  void Append(BytesRef src, Options options = Options());
  void Append(ExternalRef src);
  void Append(ExternalRef src, Options options);
  template <typename Src,
            std::enable_if_t<SupportsExternalRefWhole<Src>::value, int> = 0>
  void Append(Src&& src);
  template <typename Src,
            std::enable_if_t<SupportsExternalRefWhole<Src>::value, int> = 0>
  void Append(Src&& src, Options options);
  void Append(const Chain& src, Options options = Options());
  void Append(Chain&& src, Options options = Options());
  void Append(const Block& src, Options options = Options());
  void Append(Block&& src, Options options = Options());
  void Append(const absl::Cord& src, Options options = Options());
  void Append(absl::Cord&& src, Options options = Options());
  void Prepend(BytesRef src, Options options = Options());
  void Prepend(ExternalRef src);
  void Prepend(ExternalRef src, Options options);
  template <typename Src,
            std::enable_if_t<SupportsExternalRefWhole<Src>::value, int> = 0>
  void Prepend(Src&& src);
  template <typename Src,
            std::enable_if_t<SupportsExternalRefWhole<Src>::value, int> = 0>
  void Prepend(Src&& src, Options options);
  void Prepend(const Chain& src, Options options = Options());
  void Prepend(Chain&& src, Options options = Options());
  void Prepend(const Block& src, Options options = Options());
  void Prepend(Block&& src, Options options = Options());
  void Prepend(const absl::Cord& src, Options options = Options());
  void Prepend(absl::Cord&& src, Options options = Options());

  // `AppendFrom(iter, length)` is equivalent to
  // `Append(absl::Cord::AdvanceAndRead(&iter, length))` but more efficient.
  void AppendFrom(absl::Cord::CharIterator& iter, size_t length,
                  Options options = Options());

  // Removes suffix/prefix of the given length.
  //
  // Precondition: `length <= size()`
  void RemoveSuffix(size_t length, Options options = Options());
  void RemovePrefix(size_t length, Options options = Options());

  friend void swap(Chain& a, Chain& b) noexcept;

  friend bool operator==(const Chain& a, const Chain& b) {
    return a.size() == b.size() && Compare(a, b) == 0;
  }
  friend StrongOrdering RIEGELI_COMPARE(const Chain& a, const Chain& b) {
    return Compare(a, b);
  }

  friend bool operator==(const Chain& a, absl::string_view b) {
    return a.size() == b.size() && Compare(a, b) == 0;
  }
  friend StrongOrdering RIEGELI_COMPARE(const Chain& a, absl::string_view b) {
    return Compare(a, b);
  }

  template <typename HashState>
  friend HashState AbslHashValue(HashState hash_state, const Chain& self) {
    return self.HashValue(std::move(hash_state));
  }

  // Default stringification by `absl::StrCat()` etc.
  template <typename Sink>
  friend void AbslStringify(Sink& dest, const Chain& src) {
    src.Stringify(dest);
  }

  friend std::ostream& operator<<(std::ostream& dest, const Chain& src) {
    src.Output(dest);
    return dest;
  }

  // Supports `riegeli::Debug()`.
  template <typename DebugStream>
  friend void RiegeliDebug(const Chain& src, DebugStream& dest) {
    src.Debug(dest);
  }

  // Supports `absl::Format(&chain, format, args...)`.
  friend void AbslFormatFlush(Chain* dest, absl::string_view src) {
    dest->Append(src);
  }

  // For testing. If `RIEGELI_DEBUG` is defined, verifies internal invariants,
  // otherwise does nothing.
  void VerifyInvariants() const;

 private:
  class BlockPtrPtr;
  struct MakeBlock;
  struct ExternalMethods;
  template <typename T>
  struct ExternalMethodsFor;

  struct Empty {};

  struct Allocated {
    // The extent of the allocated array of block pointers. This array is
    // immediately followed by the array of block offsets of the same size,
    // used for efficient finding of the block covering the given position.
    // Only some middle portion of each array is filled.
    //
    // The offset of the first block is not necessarily 0 but an arbitrary value
    // (with possible wrapping around the `size_t` range), to avoid having to
    // update all offsets in `Prepend()` or `RemovePrefix()`.
    BlockPtr* begin;
    BlockPtr* end;
  };

  union BlockPtrs {
    constexpr BlockPtrs() noexcept : empty() {}

    // If the `Chain` is empty, no block pointers are needed. Some union member
    // is needed though for the default constructor to be constexpr.
    Empty empty;
    // If `begin_ == end_`, `size_` characters.
    //
    // If also `has_here()`, then there are 0 pointers in `here` so `short_data`
    // can safely contain `size_` characters. If also `has_allocated()`, then
    // `size_ == 0`, and `EnsureHasHere()` must be called before writing to
    // `short_data`.
    char short_data[kMaxShortDataSize];
    // If `has_here()`, array of block pointers between `begin_` i.e. `here` and
    // `end_` (0 to 2 pointers). In this case block offsets are implicit.
    BlockPtr here[2];
    // If `has_allocated()`, pointers to a heap-allocated array of block
    // pointers and block offsets.
    Allocated allocated;
  };

  // When deciding whether to copy an array of bytes or perform a small memory
  // allocation, prefer copying up to this length.
  static constexpr size_t kAllocationCost = 512;

  bool ClearSlow();
  absl::string_view FlattenSlow();

  bool has_here() const { return begin_ == block_ptrs_.here; }
  bool has_allocated() const { return begin_ != block_ptrs_.here; }

  absl::string_view short_data() const;
  char* short_data_begin();
  const char* short_data_begin() const;

  static BlockPtr* NewBlockPtrs(size_t capacity);
  void DeleteBlockPtrs();
  // If `has_allocated()`, delete the block pointer array and make `has_here()`
  // `true`. This is used before appending to `short_data`.
  //
  // Precondition: `begin_ == end_`
  void EnsureHasHere();

  void UnrefBlocks();
  static void UnrefBlocks(const BlockPtr* begin, const BlockPtr* end);
  static void UnrefBlocksSlow(const BlockPtr* begin, const BlockPtr* end);

  void DropPassedBlocks(PassOwnership);
  void DropPassedBlocks(ShareOwnership) const;

  // The offset of the block offsets part of the block pointer array, in array
  // elements.
  size_t block_offsets() const {
    RIEGELI_ASSERT(has_allocated())
        << "Failed precondition of block_offsets(): "
           "block pointer array is not allocated";
    return PtrDistance(block_ptrs_.allocated.begin, block_ptrs_.allocated.end);
  }

  // Returns the last block. Can be changed in place (if its own constraints
  // allow that).
  RawBlock* const& back() const { return end_[-1].block_ptr; }
  // Returns the first block. If its size changes, this must be reflected in the
  // array of block offset, e.g. with `RefreshFront()`.
  RawBlock* const& front() const { return begin_[0].block_ptr; }

  void Initialize(absl::string_view src);
  void InitializeSlow(absl::string_view src);
  void Initialize(Block src);
  void Initialize(const absl::Cord& src);
  void Initialize(absl::Cord&& src);
  // This template is defined and used only in chain.cc.
  template <typename CordRef>
  void InitializeFromCord(CordRef&& src);
  void Initialize(const Chain& src);
  void CopyToSlow(char* dest) const;
  std::string ToString() const;
  void AppendToSlow(absl::Cord& dest) const&;
  void AppendToSlow(absl::Cord& dest) &&;
  void PrependToSlow(absl::Cord& dest) const&;
  void PrependToSlow(absl::Cord& dest) &&;

  IntrusiveSharedPtr<RawBlock> SetBack(IntrusiveSharedPtr<RawBlock> block);
  IntrusiveSharedPtr<RawBlock> SetFront(IntrusiveSharedPtr<RawBlock> block);
  // Like `SetFront()`, but skips the `RefreshFront()` step. This is enough if
  // the block has the same size as the block being replaced.
  IntrusiveSharedPtr<RawBlock> SetFrontSameSize(
      IntrusiveSharedPtr<RawBlock> block);
  // Recomputes the block offset of the first block if needed.
  void RefreshFront();
  void PushBack(IntrusiveSharedPtr<RawBlock> block);
  void PushFront(IntrusiveSharedPtr<RawBlock> block);
  IntrusiveSharedPtr<RawBlock> PopBack();
  IntrusiveSharedPtr<RawBlock> PopFront();
  // This template is defined and used only in chain.cc.
  template <typename Ownership>
  void AppendBlocks(const BlockPtr* begin, const BlockPtr* end);
  // This template is defined and used only in chain.cc.
  template <typename Ownership>
  void PrependBlocks(const BlockPtr* begin, const BlockPtr* end);
  void ReserveBack(size_t extra_capacity);
  void ReserveFront(size_t extra_capacity);
  void ReserveBackSlow(size_t extra_capacity);
  void ReserveFrontSlow(size_t extra_capacity);

  // Decides about the capacity of a new block to be appended/prepended.
  //
  // If `replaced_length > 0`, the block will replace an existing block of that
  // size. In addition to `replaced_length`, it requires the capacity of at
  // least `min_length`, preferably `recommended_length`.
  size_t NewBlockCapacity(size_t replaced_length, size_t min_length,
                          size_t recommended_length, Options options) const;

  // This template is defined and used only in chain.cc.
  template <typename Ownership, typename ChainRef>
  void AppendChain(ChainRef&& src, Options options);
  // This template is defined and used only in chain.cc.
  template <typename Ownership, typename ChainRef>
  void PrependChain(ChainRef&& src, Options options);

  // This template is defined and used only in chain.cc.
  template <typename RawBlockPtrRef>
  void AppendRawBlock(RawBlockPtrRef&& block, Options options = Options());
  // This template is defined and used only in chain.cc.
  template <typename RawBlockPtrRef>
  void PrependRawBlock(RawBlockPtrRef&& block, Options options = Options());

  // This template is defined and used only in chain.cc.
  template <typename CordRef>
  void AppendCord(CordRef&& src, Options options);
  // This template is defined and used only in chain.cc.
  template <typename CordRef>
  void AppendCordSlow(CordRef&& src, Options options);
  // This template is defined and used only in chain.cc.
  template <typename CordRef>
  void PrependCord(CordRef&& src, Options options);

  void RegisterSubobjects(MemoryEstimator& memory_estimator) const;
  static StrongOrdering Compare(const Chain& a, const Chain& b);
  static StrongOrdering Compare(const Chain& a, absl::string_view b);
  template <typename HashState>
  HashState HashValue(HashState hash_state) const;
  template <typename Sink>
  void Stringify(Sink& dest) const;
  void Output(std::ostream& dest) const;
  template <typename DebugStream>
  void Debug(DebugStream& dest) const;

  BlockPtrs block_ptrs_;

  // The range of the block pointers array which is actually used.
  //
  // Invariants:
  //   `begin_ <= end_`
  //   if `has_here()` then `begin_ == block_ptrs_.here`
  //                    and `end_ <= block_ptrs_.here + 2`
  //   if `has_allocated()` then `begin_ >= block_ptrs_.allocated.begin`
  //                         and `end_ <= block_ptrs_.allocated.end`
  BlockPtr* begin_ = block_ptrs_.here;
  BlockPtr* end_ = block_ptrs_.here;

  // Invariants:
  //   if `begin_ == end_` then `size_ <= kMaxShortDataSize`
  //   if `begin_ == end_ && has_allocated()` then `size_ == 0`
  //   if `begin_ != end_` then
  //       `size_` is the sum of sizes of blocks in the range [`begin_`..`end_`)
  size_t size_ = 0;
};

// Implementation details follow.

// `Chain` representation consists of blocks.
//
// An internal block holds an allocated array which consists of free space
// before data, data, and free space after data. Block size is the size of
// data; block capacity is the size of the allocated array.
//
// An external block holds some object which keeps a data array alive, the
// destructor of the object, and the address of the data array.
//
// Definitions:
//  - empty block: a block with size == 0
//  - tiny block: a block with size < `kDefaultMinBlockSize`
//  - wasteful block: a block with free space > size + `kDefaultMinBlockSize`
//
// Invariants of a `Chain`:
//  - A block can be empty or wasteful only if it is the first or last block.
//  - Tiny blocks must not be adjacent.
class Chain::RawBlock {
 public:
  static constexpr size_t kInternalAllocatedOffset();
  static constexpr size_t kMaxCapacity =
      size_t{std::numeric_limits<ptrdiff_t>::max()};

  // Creates an internal block.
  static IntrusiveSharedPtr<RawBlock> NewInternal(size_t min_capacity);

  // Constructs an internal block. This constructor is public for
  // `SizeReturningNewAligned()`.
  explicit RawBlock(const size_t* raw_capacity);

  // Constructs an external block containing an external object of type `T`,
  // and sets block data to `BytesRef(new_object)`. This constructor is public
  // for `NewAligned()`.
  template <typename T>
  explicit RawBlock(Initializer<T> object);

  // Constructs an external block containing an external object of type `T`, and
  // sets block data to `data`. This constructor is public for `NewAligned()`.
  template <typename T>
  explicit RawBlock(Initializer<T> object, absl::string_view substr);

  // Allocated size of an external block containing an external object of type
  // `T`.
  template <typename T>
  static constexpr size_t kExternalAllocatedSize();

  template <typename Ownership = ShareOwnership>
  RawBlock* Ref();

  template <typename Ownership = PassOwnership>
  void Unref();

  IntrusiveSharedPtr<RawBlock> Copy();

  bool TryClear();

  /*implicit*/ operator absl::string_view() const { return substr_; }
  bool empty() const { return substr_.empty(); }
  size_t size() const { return substr_.size(); }
  const char* data_begin() const { return substr_.data(); }
  const char* data_end() const { return substr_.data() + substr_.size(); }

  // Returns a reference to the external object, assuming that this is an
  // external block holding an object of type `T`.
  template <typename T>
  T& unchecked_external_object();
  template <typename T>
  const T& unchecked_external_object() const;

  // Returns a pointer to the external object if this is an external block
  // holding an object of type `T`, otherwise returns `nullptr`.
  template <typename T>
  const T* checked_external_object() const;

  // Returns a pointer to the external object if this is an external block
  // holding an object of type `T` and the block has a unique owner, otherwise
  // returns `nullptr`.
  template <typename T>
  T* checked_external_object_with_unique_owner();

  bool tiny(size_t extra_size = 0) const;
  bool wasteful(size_t extra_size = 0) const;

  // Shows internal structure in a human-readable way, for debugging.
  void DumpStructure(std::ostream& dest) const;

  // Supports `MemoryEstimator`.
  friend size_t RiegeliDynamicSizeOf(const RawBlock* self) {
    return self->DynamicSizeOf();
  }

  // Supports `MemoryEstimator`.
  friend void RiegeliRegisterSubobjects(const RawBlock* self,
                                        MemoryEstimator& memory_estimator) {
    self->RegisterSubobjects(memory_estimator);
  }

  bool can_append(size_t length) const;
  bool can_prepend(size_t length) const;
  absl::Span<char> AppendBuffer(size_t max_length);
  absl::Span<char> PrependBuffer(size_t max_length);
  void Append(absl::string_view src, size_t space_before = 0);
  // Reads `size_to_copy` from `src.data()` but accounts for `src.size()`.
  // Faster than `Append()` if `size_to_copy` is a compile time constant, but
  // requires `size_to_copy` bytes to be readable, possibly past the end of src.
  //
  // Precondition: `size_to_copy >= src.size()`
  void AppendWithExplicitSizeToCopy(absl::string_view src, size_t size_to_copy);
  void Prepend(absl::string_view src, size_t space_after = 0);
  bool TryRemoveSuffix(size_t length);
  bool TryRemovePrefix(size_t length);

 private:
  template <typename T>
  friend struct ExternalMethodsFor;

  struct External {
    // Type-erased methods of the object.
    const ExternalMethods* methods;
    // Lowest possible beginning of the object (actual object has a different
    // type and can begin at a higher address due to alignment).
    char object_lower_bound[1];
  };

  template <typename T>
  static constexpr size_t kExternalObjectOffset();

#if RIEGELI_DEBUG
  template <typename T, std::enable_if_t<
                            std::is_convertible_v<const T&, BytesRef>, int> = 0>
  static void AssertSubstr(const T& object, absl::string_view substr) {
    if (!substr.empty()) {
      const BytesRef whole = object;
      RIEGELI_ASSERT(std::greater_equal<>()(substr.data(), whole.data()))
          << "Failed precondition of Chain::Block::Block(): "
             "substring not contained in whole data";
      RIEGELI_ASSERT(std::less_equal<>()(substr.data() + substr.size(),
                                         whole.data() + whole.size()))
          << "Failed precondition of Chain::Block::Block(): "
             "substring not contained in whole data";
    }
  }
  template <
      typename T,
      std::enable_if_t<!std::is_convertible_v<const T&, BytesRef>, int> = 0>
#else
  template <typename T>
#endif
  static void AssertSubstr(ABSL_ATTRIBUTE_UNUSED const T& object,
                           ABSL_ATTRIBUTE_UNUSED absl::string_view substr) {
  }

  bool is_mutable() const { return is_internal() && has_unique_owner(); }

  bool has_unique_owner() const;

  bool is_internal() const { return allocated_end_ != nullptr; }
  bool is_external() const { return allocated_end_ == nullptr; }

  size_t capacity() const;
  size_t space_before() const;
  size_t space_after() const;

  size_t DynamicSizeOf() const;
  void RegisterSubobjects(MemoryEstimator& memory_estimator) const;

  RefCount ref_count_;
  absl::string_view substr_;
  // If `is_internal()`, end of allocated space. If `is_external()`, `nullptr`.
  // This distinguishes internal from external blocks.
  char* allocated_end_ = nullptr;
  union {
    // If `is_internal()`, beginning of data (actual allocated size is larger).
    char allocated_begin_[1];
    // If `is_external()`, the remaining fields.
    External external_;
  };
};

// Represents a reference counted pointer to a single block of a `Chain`.
class Chain::Block {
 public:
  // Creates an empty `Block`.
  Block() = default;

  // Given an object which owns a byte array, converts it to a `Block` by
  // attaching the object, avoiding copying the bytes.
  //
  // `ExternalRef` is a higher level mechanism which chooses between sharing the
  // object and copying the data.
  //
  // The `object` parameter supports `riegeli::Maker<T>(args...)` to construct
  // `T` in-place.
  //
  // If the `substr` parameter is given, `substr` must be owned by the object
  // after it gets created or moved.
  //
  // If the `substr` parameter is not given, `T` must be convertible to
  // `BytesRef`.
  //
  // `T` may also support the following member functions, either with or without
  // the `substr` parameter, with the following definitions assumed by default:
  // ```
  //   // Called once before the destructor, except on a moved-from object.
  //   // If only this function is needed, `T` can be a lambda.
  //   void operator()(absl::string_view substr) && {}
  //
  //   // Shows internal structure in a human-readable way, for debugging.
  //   friend void RiegeliDumpStructure(const T* self, absl::string_view substr,
  //                                    std::ostream& dest) {
  //     out << "[external] { }";
  //   }
  //
  //   // Registers this object with `MemoryEstimator`.
  //   //
  //   // By default calls `memory_estimator.RegisterUnknownType<T>()` and
  //   // as an approximation of memory usage of an unknown type, registers just
  //   // the stored `substr` if unique.
  //   friend void RiegeliRegisterSubobjects(
  //       const T* self, riegeli::MemoryEstimator& memory_estimator);
  // ```
  //
  // The `substr` parameter of these member functions, if present, will get the
  // `substr` parameter passed to `FromExternal()`. Having `substr` available in
  // these functions might avoid storing `substr` in the external object.
  template <typename T,
            std::enable_if_t<
                std::conjunction_v<NotSameRef<Block, TargetT<T>>,
                                   std::is_convertible<TargetT<T>, BytesRef>>,
                int> = 0>
  explicit Block(T&& object);
  template <typename T>
  explicit Block(T&& object, absl::string_view substr);

  Block(const Block& that) = default;
  Block& operator=(const Block& that) = default;

  Block(Block&& that) = default;
  Block& operator=(Block&& that) = default;

  /*implicit*/ operator absl::string_view() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    if (block_ == nullptr) return absl::string_view();
    return *block_;
  }

  bool empty() const { return block_ == nullptr || block_->empty(); }
  size_t size() const {
    if (block_ == nullptr) return 0;
    return block_->size();
  }
  const char* data() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    if (block_ == nullptr) return nullptr;
    return block_->data_begin();
  }

  // Indicates support for:
  //  * `ExternalRef(const Block&)`
  //  * `ExternalRef(Block&&)`
  //  * `ExternalRef(const Block&, substr)`
  //  * `ExternalRef(Block&&, substr)`
  friend void RiegeliSupportsExternalRef(const Block*) {}

  // Supports `ExternalRef`.
  friend Block RiegeliToChainBlock(Block* self, absl::string_view substr) {
    return std::move(*self).ToChainBlock(substr);
  }

  // Supports `ExternalRef`.
  friend absl::Cord RiegeliToCord(Block* self, absl::string_view substr) {
    return std::move(*self).ToCord(substr);
  }
  friend absl::Cord RiegeliToCord(const Block* self, absl::string_view substr) {
    return self->ToCord(substr);
  }

  // Supports `ExternalRef`.
  friend ExternalStorage RiegeliToExternalStorage(Block* self) {
    return std::move(*self).ToExternalStorage();
  }

  // Supports `ExternalRef` and `Chain::Block`.
  friend void RiegeliDumpStructure(const Block* self, absl::string_view substr,
                                   std::ostream& dest) {
    self->DumpStructure(substr, dest);
  }

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const Block* self,
                                        MemoryEstimator& memory_estimator) {
    memory_estimator.RegisterSubobjects(&self->block_);
  }

 private:
  friend class Chain;  // For `Block()` and `raw_block()`.

  explicit Block(RawBlock* block);
  explicit Block(RawBlock* block, absl::string_view substr);
  explicit Block(IntrusiveSharedPtr<RawBlock> block);

  const IntrusiveSharedPtr<RawBlock>& raw_block() const& { return block_; }
  IntrusiveSharedPtr<RawBlock>&& raw_block() && { return std::move(block_); }

  Block ToChainBlock(absl::string_view substr) &&;
  absl::Cord ToCord(absl::string_view substr) &&;
  absl::Cord ToCord(absl::string_view substr) const&;
  ExternalStorage ToExternalStorage() &&;
  void DumpStructure(absl::string_view substr, std::ostream& dest) const;

  IntrusiveSharedPtr<RawBlock> block_;
};

}  // namespace riegeli

#endif  // RIEGELI_BASE_CHAIN_BASE_H_


================================================
FILE: riegeli/base/chain_details.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_CHAIN_DETAILS_H_
#define RIEGELI_BASE_CHAIN_DETAILS_H_

// IWYU pragma: private, include "riegeli/base/chain.h"

#include <stddef.h>
#include <stdint.h>

#include <algorithm>
#include <cstring>
#include <iosfwd>
#include <iterator>
#include <memory>
#include <new>
#include <optional>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain_base.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/external_data.h"
#include "riegeli/base/external_ref_base.h"
#include "riegeli/base/external_ref_support.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/intrusive_shared_ptr.h"
#include "riegeli/base/iterable.h"
#include "riegeli/base/memory_estimator.h"
#include "riegeli/base/new_aligned.h"
#include "riegeli/base/ownership.h"
#include "riegeli/base/type_traits.h"

namespace riegeli {

// Represents either `const BlockPtr*`, or one of two special values
// (`kBeginShortData` and `kEndShortData`) behaving as if they were pointers in
// a single-element `BlockPtr` array.
class Chain::BlockPtrPtr : public WithCompare<BlockPtrPtr> {
 public:
  explicit constexpr BlockPtrPtr(uintptr_t repr) : repr_(repr) {}
  static BlockPtrPtr from_ptr(const BlockPtr* ptr);

  bool is_special() const;
  const BlockPtr* as_ptr() const;

  BlockPtrPtr operator+(ptrdiff_t n) const;
  BlockPtrPtr operator-(ptrdiff_t n) const;
  friend ptrdiff_t operator-(BlockPtrPtr a, BlockPtrPtr b) {
    return Subtract(a, b);
  }

  friend bool operator==(BlockPtrPtr a, BlockPtrPtr b) {
    return a.repr_ == b.repr_;
  }
  friend StrongOrdering RIEGELI_COMPARE(BlockPtrPtr a, BlockPtrPtr b) {
    RIEGELI_ASSERT_EQ(a.is_special(), b.is_special())
        << "Incompatible BlockPtrPtr values";
    if (a.is_special()) return riegeli::Compare(a.repr_, b.repr_);
    return riegeli::Compare(a.as_ptr(), b.as_ptr());
  }

 private:
  // `operator-` body is defined in a member function to gain access to private
  // `Chain::RawBlock` under gcc.
  static ptrdiff_t Subtract(BlockPtrPtr a, BlockPtrPtr b) {
    RIEGELI_ASSERT_EQ(a.is_special(), b.is_special())
        << "Incompatible BlockPtrPtr values";
    if (a.is_special()) {
      const ptrdiff_t byte_diff =
          static_cast<ptrdiff_t>(a.repr_) - static_cast<ptrdiff_t>(b.repr_);
      // Pointer subtraction with the element size being a power of 2 typically
      // rounds in the same way as right shift (towards -inf), not as division
      // (towards zero), so the right shift allows the compiler to eliminate the
      // `is_special()` check.
      switch (sizeof(RawBlock*)) {
        case 1 << 2:
          return byte_diff >> 2;
        case 1 << 3:
          return byte_diff >> 3;
        default:
          return byte_diff / ptrdiff_t{sizeof(RawBlock*)};
      }
    }
    return a.as_ptr() - b.as_ptr();
  }

  uintptr_t repr_;
};

// Access private constructors of `Chain::Block`.
struct Chain::MakeBlock {
  Block operator()(IntrusiveSharedPtr<RawBlock> block) const {
    return Block(std::move(block));
  }
  Block operator()(RawBlock* block) const { return Block(block); }
};

class Chain::BlockRef {
 public:
  BlockRef(const BlockRef& that) = default;
  BlockRef& operator=(const BlockRef& that) = default;

  /*implicit*/ operator absl::string_view() const;

  bool empty() const;
  const char* data() const;
  size_t size() const;

  // Indicates support for:
  //  * `ExternalRef(BlockRef)`
  //  * `ExternalRef(BlockRef, substr)`
  friend void RiegeliSupportsExternalRef(const BlockRef*) {}

  // Supports `ExternalRef`.
  friend bool RiegeliExternalCopy(const BlockRef* self) {
    return self->ExternalCopy();
  }

  // Supports `ExternalRef`.
  friend Chain::Block RiegeliToChainBlock(const BlockRef* self,
                                          absl::string_view substr) {
    return self->ToChainBlock(substr);
  }

  // Supports `ExternalRef`.
  template <typename Callback>
  friend void RiegeliExternalDelegate(const BlockRef* self,
                                      absl::string_view substr,
                                      Callback&& delegate_to) {
    self->ExternalDelegate(substr, std::forward<Callback>(delegate_to));
  }

  // Returns a pointer to the external object if this is an external block
  // holding an object of type `T`, otherwise returns `nullptr`.
  template <typename T>
  const T* external_object() const;

 private:
  friend class Chain;  // For `BlockRef()`.

  explicit BlockRef(const Chain* chain, BlockPtrPtr ptr)
      : chain_(chain), ptr_(ptr) {}

  bool ExternalCopy() const;
  Chain::Block ToChainBlock(absl::string_view substr) const;
  template <typename Callback>
  void ExternalDelegate(absl::string_view substr, Callback&& delegate_to) const;

  const Chain* chain_;
  // If `*chain_` has short data, `kBeginShortData`.
  // If `*chain_` has block pointers, a pointer to an element of the block
  // pointer array.
  BlockPtrPtr ptr_;
};

class Chain::BlockIterator : public WithCompare<BlockIterator> {
 public:
  using iterator_concept = std::random_access_iterator_tag;
  // `iterator_category` is only `std::input_iterator_tag` because the
  // `LegacyForwardIterator` requirement and above require `reference` to be
  // a true reference type.
  using iterator_category = std::input_iterator_tag;
  using value_type = BlockRef;
  using reference = value_type;
  using pointer = ArrowProxy<reference>;
  using difference_type = ptrdiff_t;

  BlockIterator() = default;

  explicit BlockIterator(const Chain* chain ABSL_ATTRIBUTE_LIFETIME_BOUND,
                         size_t block_index);

  BlockIterator(const BlockIterator& that) = default;
  BlockIterator& operator=(const BlockIterator& that) = default;

  const Chain* chain() const { return chain_; }
  size_t block_index() const;

  // Returns the char index relative to the beginning of the chain, given the
  // corresponding char index relative to the beginning of the block.
  //
  // The opposite conversion is `Chain::BlockAndCharIndex()`.
  size_t CharIndexInChain(size_t char_index_in_block = 0) const;

  reference operator*() const;
  pointer operator->() const;
  BlockIterator& operator++();
  BlockIterator operator++(int);
  BlockIterator& operator--();
  BlockIterator operator--(int);
  BlockIterator& operator+=(difference_type n);
  BlockIterator operator+(difference_type n) const;
  BlockIterator& operator-=(difference_type n);
  BlockIterator operator-(difference_type n) const;
  reference operator[](difference_type n) const;

  friend bool operator==(BlockIterator a, BlockIterator b) {
    RIEGELI_ASSERT_EQ(a.chain_, b.chain_)
        << "Failed precondition of operator==(Chain::BlockIterator): "
           "incomparable iterators";
    return a.ptr_ == b.ptr_;
  }
  friend StrongOrdering RIEGELI_COMPARE(BlockIterator a, BlockIterator b) {
    RIEGELI_ASSERT_EQ(a.chain_, b.chain_)
        << "Failed precondition of operator<=>(Chain::BlockIterator): "
           "incomparable iterators";
    return riegeli::Compare(a.ptr_, b.ptr_);
  }
  friend difference_type operator-(BlockIterator a, BlockIterator b) {
    RIEGELI_ASSERT_EQ(a.chain_, b.chain_)
        << "Failed precondition of operator-(Chain::BlockIterator): "
           "incomparable iterators";
    return a.ptr_ - b.ptr_;
  }
  friend BlockIterator operator+(difference_type n, BlockIterator a) {
    return a + n;
  }

 private:
  friend class Chain;

  static constexpr BlockPtrPtr kBeginShortData{0};
  static constexpr BlockPtrPtr kEndShortData{sizeof(BlockPtr)};

  explicit BlockIterator(const Chain* chain, BlockPtrPtr ptr);

  size_t CharIndexInChainInternal() const;

  const Chain* chain_ = nullptr;
  // If `chain_ == nullptr`, `kBeginShortData`.
  // If `*chain_` has no block pointers and no short data, `kEndShortData`.
  // If `*chain_` has short data, `kBeginShortData` or `kEndShortData`.
  // If `*chain_` has block pointers, a pointer to an element of the block
  // pointer array.
  BlockPtrPtr ptr_ = kBeginShortData;
};

class Chain::Blocks {
 public:
  using value_type = BlockRef;
  using reference = value_type;
  using const_reference = reference;
  using iterator = BlockIterator;
  using const_iterator = iterator;
  using reverse_iterator = std::reverse_iterator<iterator>;
  using const_reverse_iterator = reverse_iterator;
  using size_type = size_t;
  using difference_type = ptrdiff_t;

  Blocks() = default;

  Blocks(const Blocks& that) = default;
  Blocks& operator=(const Blocks& that) = default;

  iterator begin() const;
  iterator cbegin() const { return begin(); }
  iterator end() const;
  iterator cend() const { return end(); }

  reverse_iterator rbegin() const { return reverse_iterator(end()); }
  reverse_iterator crbegin() const { return rbegin(); }
  reverse_iterator rend() const { return reverse_iterator(begin()); }
  reverse_iterator crend() const { return rend(); }

  bool empty() const;
  size_type size() const;

  reference operator[](size_type n) const;
  reference at(size_type n) const;
  reference front() const;
  reference back() const;

 private:
  friend class Chain;

  explicit Blocks(const Chain* chain) noexcept : chain_(chain) {}

  const Chain* chain_ = nullptr;
};

// Represents the position of a character in a `Chain`.
//
// A `CharIterator` is not provided because it is more efficient to iterate by
// blocks and process character ranges within a block.
struct Chain::BlockAndChar {
  // Intended invariant:
  //   if `block_iter == block_iter.chain()->blocks().cend()`
  //       then `char_index == 0`
  //       else `char_index < block_iter->size()`
  BlockIterator block_iter;
  size_t char_index;
};

// Implementation details follow.

struct Chain::ExternalMethods {
  void (*delete_block)(RawBlock* block);
  void (*dump_structure)(const RawBlock& block, std::ostream& dest);
  size_t dynamic_sizeof;
  void (*register_subobjects)(const RawBlock* block,
                              MemoryEstimator& memory_estimator);
};

namespace chain_internal {

template <typename T, typename Enable = void>
struct HasCallOperatorSubstr : std::false_type {};

template <typename T>
struct HasCallOperatorSubstr<T, std::void_t<decltype(std::declval<T&&>()(
                                    std::declval<absl::string_view>()))>>
    : std::true_type {};

template <typename T, typename Enable = void>
struct HasCallOperatorWhole : std::false_type {};

template <typename T>
struct HasCallOperatorWhole<T, std::void_t<decltype(std::declval<T&&>()())>>
    : std::true_type {};

template <typename T>
struct HasCallOperator
    : std::disjunction<HasCallOperatorSubstr<T>, HasCallOperatorWhole<T>> {};

template <typename T,
          std::enable_if_t<HasCallOperatorSubstr<T>::value, int> = 0>
inline void CallOperator(T&& object, absl::string_view substr) {
  std::forward<T>(object)(substr);
}

template <
    typename T,
    std::enable_if_t<std::conjunction_v<std::negation<HasCallOperatorSubstr<T>>,
                                        HasCallOperatorWhole<T>>,
                     int> = 0>
inline void CallOperator(T&& object,
                         ABSL_ATTRIBUTE_UNUSED absl::string_view substr) {
  std::forward<T>(object)();
}

template <
    typename T,
    std::enable_if_t<std::conjunction_v<std::negation<HasCallOperatorSubstr<T>>,
                                        std::negation<HasCallOperatorWhole<T>>>,
                     int> = 0>
inline void CallOperator(ABSL_ATTRIBUTE_UNUSED T&& object,
                         ABSL_ATTRIBUTE_UNUSED absl::string_view substr) {}

template <typename T,
          std::enable_if_t<MemoryEstimator::RegisterSubobjectsIsGood<T>::value,
                           int> = 0>
inline void RegisterSubobjects(const T* object,
                               ABSL_ATTRIBUTE_UNUSED absl::string_view substr,
                               MemoryEstimator& memory_estimator) {
  memory_estimator.RegisterSubobjects(object);
}

template <typename T,
          std::enable_if_t<!MemoryEstimator::RegisterSubobjectsIsGood<T>::value,
                           int> = 0>
inline void RegisterSubobjects(ABSL_ATTRIBUTE_UNUSED const T* object,
                               absl::string_view substr,
                               MemoryEstimator& memory_estimator) {
  memory_estimator.RegisterUnknownType<T>();
  // As an approximation of memory usage of an unknown type, register just the
  // stored data if unique.
  if (memory_estimator.RegisterNode(substr.data())) {
    memory_estimator.RegisterDynamicMemory(substr.size());
  }
}

template <typename T, typename Enable = void>
struct HasRiegeliDumpStructureWithSubstr : std::false_type {};

template <typename T>
struct HasRiegeliDumpStructureWithSubstr<
    T, std::void_t<decltype(RiegeliDumpStructure(
           std::declval<const T*>(), std::declval<absl::string_view>(),
           std::declval<std::ostream&>()))>> : std::true_type {};

template <typename T, typename Enable = void>
struct HasRiegeliDumpStructureWithoutData : std::false_type {};

template <typename T>
struct HasRiegeliDumpStructureWithoutData<
    T, std::void_t<decltype(RiegeliDumpStructure(
           std::declval<const T*>(), std::declval<std::ostream&>()))>>
    : std::true_type {};

void DumpStructureDefault(std::ostream& dest);

template <typename T, std::enable_if_t<
                          HasRiegeliDumpStructureWithSubstr<T>::value, int> = 0>
inline void DumpStructure(const T* object, absl::string_view substr,
                          std::ostream& dest) {
  RiegeliDumpStructure(object, substr, dest);
}

template <
    typename T,
    std::enable_if_t<
        std::conjunction_v<std::negation<HasRiegeliDumpStructureWithSubstr<T>>,
                           HasRiegeliDumpStructureWithoutData<T>>,
        int> = 0>
inline void DumpStructure(const T* object,
                          ABSL_ATTRIBUTE_UNUSED absl::string_view substr,
                          std::ostream& dest) {
  RiegeliDumpStructure(object, dest);
}

template <
    typename T,
    std::enable_if_t<std::conjunction_v<
                         std::negation<HasRiegeliDumpStructureWithSubstr<T>>,
                         std::negation<HasRiegeliDumpStructureWithoutData<T>>>,
                     int> = 0>
inline void DumpStructure(ABSL_ATTRIBUTE_UNUSED const T* object,
                          ABSL_ATTRIBUTE_UNUSED absl::string_view substr,
                          std::ostream& dest) {
  chain_internal::DumpStructureDefault(dest);
}

}  // namespace chain_internal

// Supports `ExternalRef` and `Chain::Block`.
void RiegeliDumpStructure(const std::string* self, std::ostream& dest);

template <typename T>
struct Chain::ExternalMethodsFor {
  // Creates an external block containing an external object constructed from
  // `object`, and sets block data to `BytesRef(new_object)`.
  static IntrusiveSharedPtr<RawBlock> NewBlock(Initializer<T> object);

  // Creates an external block containing an external object constructed from
  // `object`, and sets block data to `data`.
  static IntrusiveSharedPtr<RawBlock> NewBlock(Initializer<T> object,
                                               absl::string_view substr);

 private:
  static void DeleteBlock(RawBlock* block);
  static void DumpStructure(const RawBlock& block, std::ostream& dest);
  static constexpr size_t DynamicSizeOf();
  static void RegisterSubobjects(const RawBlock* block,
                                 MemoryEstimator& memory_estimator);

 public:
  static constexpr ExternalMethods kMethods = {
      DeleteBlock, DumpStructure, DynamicSizeOf(), RegisterSubobjects};
};

template <typename T>
inline IntrusiveSharedPtr<Chain::RawBlock>
Chain::ExternalMethodsFor<T>::NewBlock(Initializer<T> object) {
  return IntrusiveSharedPtr<RawBlock>(
      NewAligned<RawBlock, UnsignedMax(alignof(RawBlock), alignof(T))>(
          RawBlock::kExternalAllocatedSize<T>(), std::move(object)));
}

template <typename T>
inline IntrusiveSharedPtr<Chain::RawBlock>
Chain::ExternalMethodsFor<T>::NewBlock(Initializer<T> object,
                                       absl::string_view substr) {
  return IntrusiveSharedPtr<RawBlock>(
      NewAligned<RawBlock, UnsignedMax(alignof(RawBlock), alignof(T))>(
          RawBlock::kExternalAllocatedSize<T>(), std::move(object), substr));
}

template <typename T>
void Chain::ExternalMethodsFor<T>::DeleteBlock(RawBlock* block) {
  chain_internal::CallOperator(std::move(block->unchecked_external_object<T>()),
                               *block);
  block->unchecked_external_object<T>().~T();
  DeleteAligned<RawBlock, UnsignedMax(alignof(RawBlock), alignof(T))>(
      block, RawBlock::kExternalAllocatedSize<T>());
}

template <typename T>
void Chain::ExternalMethodsFor<T>::DumpStructure(const RawBlock& block,
                                                 std::ostream& dest) {
  chain_internal::DumpStructure(&block.unchecked_external_object<T>(), block,
                                dest);
}

template <typename T>
constexpr size_t Chain::ExternalMethodsFor<T>::DynamicSizeOf() {
  return RawBlock::kExternalAllocatedSize<T>();
}

template <typename T>
void Chain::ExternalMethodsFor<T>::RegisterSubobjects(
    const RawBlock* block, MemoryEstimator& memory_estimator) {
  chain_internal::RegisterSubobjects(&block->unchecked_external_object<T>(),
                                     *block, memory_estimator);
}

template <typename T>
inline Chain::RawBlock::RawBlock(Initializer<T> object) {
  external_.methods = &ExternalMethodsFor<T>::kMethods;
  new (&unchecked_external_object<T>()) T(std::move(object));
  substr_ = BytesRef(unchecked_external_object<T>());
  RIEGELI_ASSERT(is_external()) << "A RawBlock with allocated_end_ == nullptr "
                                   "should be considered external";
}

template <typename T>
inline Chain::RawBlock::RawBlock(Initializer<T> object,
                                 absl::string_view substr)
    : substr_(substr) {
  external_.methods = &ExternalMethodsFor<T>::kMethods;
  new (&unchecked_external_object<T>()) T(std::move(object));
  RIEGELI_ASSERT(is_external()) << "A RawBlock with allocated_end_ == nullptr "
                                   "should be considered external";
  AssertSubstr(unchecked_external_object<T>(), substr);
}

constexpr size_t Chain::RawBlock::kInternalAllocatedOffset() {
  return offsetof(RawBlock, allocated_begin_);
}

template <typename T>
constexpr size_t Chain::RawBlock::kExternalObjectOffset() {
  return RoundUp<alignof(T)>(offsetof(RawBlock, external_) +
                             offsetof(External, object_lower_bound));
}

template <typename T>
constexpr size_t Chain::RawBlock::kExternalAllocatedSize() {
  return kExternalObjectOffset<T>() + sizeof(T);
}

template <typename Ownership>
inline Chain::RawBlock* Chain::RawBlock::Ref() {
  ref_count_.Ref<Ownership>();
  return this;
}

template <typename Ownership>
inline void Chain::RawBlock::Unref() {
  if (ref_count_.Unref<Ownership>()) {
    if (is_internal()) {
      DeleteAligned<RawBlock>(this, kInternalAllocatedOffset() + capacity());
    } else {
      external_.methods->delete_block(this);
    }
  }
}

inline bool Chain::RawBlock::has_unique_owner() const {
  return ref_count_.HasUniqueOwner();
}

inline size_t Chain::RawBlock::capacity() const {
  RIEGELI_ASSERT(is_internal())
      << "Failed precondition of Chain::RawBlock::capacity(): "
         "block not internal";
  return PtrDistance(allocated_begin_, allocated_end_);
}

template <typename T>
inline T& Chain::RawBlock::unchecked_external_object() {
  RIEGELI_ASSERT(is_external())
      << "Failed precondition of Chain::RawBlock::unchecked_external_object(): "
      << "block not external";
  return *std::launder(reinterpret_cast<T*>(reinterpret_cast<char*>(this) +
                                            kExternalObjectOffset<T>()));
}

template <typename T>
inline const T& Chain::RawBlock::unchecked_external_object() const {
  RIEGELI_ASSERT(is_external())
      << "Failed precondition of Chain::RawBlock::unchecked_external_object(): "
      << "block not external";
  return *std::launder(reinterpret_cast<const T*>(
      reinterpret_cast<const char*>(this) + kExternalObjectOffset<T>()));
}

template <typename T>
inline const T* Chain::RawBlock::checked_external_object() const {
  return is_external() && external_.methods == &ExternalMethodsFor<T>::kMethods
             ? &unchecked_external_object<T>()
             : nullptr;
}

template <typename T>
inline T* Chain::RawBlock::checked_external_object_with_unique_owner() {
  return is_external() &&
                 external_.methods == &ExternalMethodsFor<T>::kMethods &&
                 has_unique_owner()
             ? &unchecked_external_object<T>()
             : nullptr;
}

inline bool Chain::RawBlock::TryClear() {
  if (is_mutable()) {
    substr_ = substr_.substr(0, 0);
    return true;
  }
  return false;
}

inline bool Chain::RawBlock::TryRemoveSuffix(size_t length) {
  RIEGELI_ASSERT_LE(length, size())
      << "Failed precondition of Chain::RawBlock::TryRemoveSuffix(): "
      << "length to remove greater than current size";
  if (is_mutable()) {
    substr_.remove_suffix(length);
    return true;
  }
  return false;
}

inline bool Chain::RawBlock::TryRemovePrefix(size_t length) {
  RIEGELI_ASSERT_LE(length, size())
      << "Failed precondition of Chain::RawBlock::TryRemovePrefix(): "
      << "length to remove greater than current size";
  if (is_mutable()) {
    substr_.remove_prefix(length);
    return true;
  }
  return false;
}

inline Chain::BlockPtrPtr Chain::BlockPtrPtr::from_ptr(const BlockPtr* ptr) {
  return BlockPtrPtr(reinterpret_cast<uintptr_t>(ptr));
}

inline bool Chain::BlockPtrPtr::is_special() const {
  return repr_ <= sizeof(BlockPtr);
}

inline const Chain::BlockPtr* Chain::BlockPtrPtr::as_ptr() const {
  RIEGELI_ASSERT(!is_special()) << "Unexpected special BlockPtrPtr value";
  return reinterpret_cast<const BlockPtr*>(repr_);
}

// Code conditional on `is_special()` is written such that both branches
// typically compile to the same code, allowing the compiler eliminate the
// `is_special()` checks.

inline Chain::BlockPtrPtr Chain::BlockPtrPtr::operator+(ptrdiff_t n) const {
  if (is_special()) {
    return BlockPtrPtr(IntCast<uintptr_t>(IntCast<ptrdiff_t>(repr_) +
                                          n * ptrdiff_t{sizeof(RawBlock*)}));
  }
  return BlockPtrPtr::from_ptr(as_ptr() + n);
}

inline Chain::BlockPtrPtr Chain::BlockPtrPtr::operator-(ptrdiff_t n) const {
  if (is_special()) {
    return BlockPtrPtr(IntCast<uintptr_t>(IntCast<ptrdiff_t>(repr_) -
                                          n * ptrdiff_t{sizeof(RawBlock*)}));
  }
  return BlockPtrPtr::from_ptr(as_ptr() - n);
}

inline Chain::BlockRef::operator absl::string_view() const {
  if (ptr_ == BlockIterator::kBeginShortData) {
    return chain_->short_data();
  } else {
    return *ptr_.as_ptr()->block_ptr;
  }
}

inline bool Chain::BlockRef::empty() const {
  return ptr_ != BlockIterator::kBeginShortData &&
         ptr_.as_ptr()->block_ptr->empty();
}

inline const char* Chain::BlockRef::data() const {
  if (ptr_ == BlockIterator::kBeginShortData) {
    return chain_->short_data_begin();
  } else {
    return ptr_.as_ptr()->block_ptr->data_begin();
  }
}

inline size_t Chain::BlockRef::size() const {
  if (ptr_ == BlockIterator::kBeginShortData) {
    return chain_->size_;
  } else {
    return ptr_.as_ptr()->block_ptr->size();
  }
}

inline bool Chain::BlockRef::ExternalCopy() const {
  return ptr_ == BlockIterator::kBeginShortData;
}

inline Chain::Block Chain::BlockRef::ToChainBlock(
    absl::string_view substr) const {
  RIEGELI_ASSERT(ptr_ != BlockIterator::kBeginShortData)
      << "Failed precondition of RiegeliToChainBlock(const Chain::BlockRef*): "
         "case excluded by RiegeliExternalCopy()";
  return Block(ptr_.as_ptr()->block_ptr, substr);
}

template <typename Callback>
inline void Chain::BlockRef::ExternalDelegate(absl::string_view substr,
                                              Callback&& delegate_to) const {
  RIEGELI_ASSERT(ptr_ != BlockIterator::kBeginShortData)
      << "Failed precondition of "
         "RiegeliExternalDelegate(const Chain::BlockRef*): "
         "case excluded by RiegeliExternalCopy()";
  std::forward<Callback>(delegate_to)(Block(ptr_.as_ptr()->block_ptr), substr);
}

template <typename T>
inline const T* Chain::BlockRef::external_object() const {
  if (ptr_ == BlockIterator::kBeginShortData) {
    return nullptr;
  } else {
    return ptr_.as_ptr()->block_ptr->checked_external_object<T>();
  }
}

inline Chain::BlockIterator::BlockIterator(
    const Chain* chain ABSL_ATTRIBUTE_LIFETIME_BOUND, size_t block_index)
    : chain_(chain),
      ptr_((ABSL_PREDICT_FALSE(chain_ == nullptr) ? kBeginShortData
            : chain_->begin_ == chain_->end_
                ? (chain_->empty() ? kEndShortData : kBeginShortData)
                : BlockPtrPtr::from_ptr(chain_->begin_)) +
           IntCast<ptrdiff_t>(block_index)) {}

inline Chain::BlockIterator::BlockIterator(const Chain* chain, BlockPtrPtr ptr)
    : chain_(chain), ptr_(ptr) {}

inline size_t Chain::BlockIterator::block_index() const {
  if (ptr_ == kBeginShortData) {
    return 0;
  } else if (ptr_ == kEndShortData) {
    return chain_->empty() ? 0 : 1;
  } else {
    return PtrDistance(chain_->begin_, ptr_.as_ptr());
  }
}

inline size_t Chain::BlockIterator::CharIndexInChain(
    size_t char_index_in_block) const {
  return CharIndexInChainInternal() + char_index_in_block;
}

inline Chain::BlockIterator::reference Chain::BlockIterator::operator*() const {
  RIEGELI_ASSERT(ptr_ != kEndShortData)
      << "Failed precondition of Chain::BlockIterator::operator*: "
         "iterator is end()";
  return BlockRef(chain_, ptr_);
}

inline Chain::BlockIterator::pointer Chain::BlockIterator::operator->() const {
  return pointer(**this);
}

inline Chain::BlockIterator& Chain::BlockIterator::operator++() {
  ptr_ = ptr_ + 1;
  return *this;
}

inline Chain::BlockIterator Chain::BlockIterator::operator++(int) {
  const BlockIterator tmp = *this;
  ++*this;
  return tmp;
}

inline Chain::BlockIterator& Chain::BlockIterator::operator--() {
  ptr_ = ptr_ - 1;
  return *this;
}

inline Chain::BlockIterator Chain::BlockIterator::operator--(int) {
  const BlockIterator tmp = *this;
  --*this;
  return tmp;
}

inline Chain::BlockIterator& Chain::BlockIterator::operator+=(
    difference_type n) {
  ptr_ = ptr_ + n;
  return *this;
}

inline Chain::BlockIterator Chain::BlockIterator::operator+(
    difference_type n) const {
  return BlockIterator(*this) += n;
}

inline Chain::BlockIterator& Chain::BlockIterator::operator-=(
    difference_type n) {
  ptr_ = ptr_ - n;
  return *this;
}

inline Chain::BlockIterator Chain::BlockIterator::operator-(
    difference_type n) const {
  return BlockIterator(*this) -= n;
}

inline Chain::BlockIterator::reference Chain::BlockIterator::operator[](
    difference_type n) const {
  return *(*this + n);
}

template <typename T,
          std::enable_if_t<
              std::conjunction_v<NotSameRef<Chain::Block, TargetT<T>>,
                                 std::is_convertible<TargetT<T>, BytesRef>>,
              int>>
inline Chain::Block::Block(T&& object)
    : block_(
          ExternalMethodsFor<TargetT<T>>::NewBlock(std::forward<T>(object))) {}

template <typename T>
inline Chain::Block::Block(T&& object, absl::string_view substr)
    : block_(ExternalMethodsFor<TargetT<T>>::NewBlock(std::forward<T>(object),
                                                      substr)) {}

inline Chain::Block::Block(RawBlock* block, absl::string_view substr) {
  if (block->size() == substr.size()) {
    block_.Reset(block, kShareOwnership);
    return;
  }
  if (const Block* const block_ptr = block->checked_external_object<Block>()) {
    // `block` is already a `Block`. Refer to its target instead.
    block = block_ptr->block_.get();
  }
  block_.Reset(block, kShareOwnership);
  block_ = ExternalMethodsFor<Block>::NewBlock(std::move(*this), substr);
}

inline Chain::Block::Block(RawBlock* block) {
  if (const Block* const block_ptr = block->checked_external_object<Block>()) {
    // `block` is already a `Block`. Refer to its target instead.
    block = block_ptr->block_.get();
  }
  block_.Reset(block, kShareOwnership);
}

inline Chain::Block::Block(IntrusiveSharedPtr<RawBlock> block) {
  if (const Block* const block_ptr = block->checked_external_object<Block>()) {
    // `block` is already a `Block`. Refer to its target instead.
    block = block_ptr->block_;
  }
  block_ = std::move(block);
}

inline ExternalStorage Chain::Block::ToExternalStorage() && {
  return ExternalStorage(block_.Release(), [](void* ptr) {
    static_cast<RawBlock*>(ptr)->Unref();
  });
}

inline Chain::Blocks::iterator Chain::Blocks::begin() const {
  return BlockIterator(chain_,
                       chain_->begin_ == chain_->end_
                           ? (chain_->empty() ? BlockIterator::kEndShortData
                                              : BlockIterator::kBeginShortData)
                           : BlockPtrPtr::from_ptr(chain_->begin_));
}

inline Chain::Blocks::iterator Chain::Blocks::end() const {
  return BlockIterator(chain_, chain_->begin_ == chain_->end_
                                   ? BlockIterator::kEndShortData
                                   : BlockPtrPtr::from_ptr(chain_->end_));
}

inline Chain::Blocks::size_type Chain::Blocks::size() const {
  if (chain_->begin_ == chain_->end_) {
    return chain_->empty() ? 0 : 1;
  } else {
    return PtrDistance(chain_->begin_, chain_->end_);
  }
}

inline bool Chain::Blocks::empty() const {
  return chain_->begin_ == chain_->end_ && chain_->empty();
}

inline Chain::Blocks::reference Chain::Blocks::operator[](size_type n) const {
  RIEGELI_ASSERT_LT(n, size())
      << "Failed precondition of Chain::Blocks::operator[]: "
         "block index out of range";
  return BlockRef(chain_, chain_->begin_ == chain_->end_
                              ? BlockIterator::kBeginShortData
                              : BlockPtrPtr::from_ptr(chain_->begin_ + n));
}

inline Chain::Blocks::reference Chain::Blocks::at(size_type n) const {
  RIEGELI_CHECK_LT(n, size()) << "Failed precondition of Chain::Blocks::at(): "
                                 "block index out of range";
  return BlockRef(chain_, chain_->begin_ == chain_->end_
                              ? BlockIterator::kBeginShortData
                              : BlockPtrPtr::from_ptr(chain_->begin_ + n));
}

inline Chain::Blocks::reference Chain::Blocks::front() const {
  RIEGELI_ASSERT(!empty())
      << "Failed precondition of Chain::Blocks::front(): no blocks";
  return BlockRef(chain_, chain_->begin_ == chain_->end_
                              ? BlockIterator::kBeginShortData
                              : BlockPtrPtr::from_ptr(chain_->begin_));
}

inline Chain::Blocks::reference Chain::Blocks::back() const {
  RIEGELI_ASSERT(!empty())
      << "Failed precondition of Chain::Blocks::back(): no blocks";
  return BlockRef(chain_, chain_->begin_ == chain_->end_
                              ? BlockIterator::kBeginShortData
                              : BlockPtrPtr::from_ptr(chain_->end_ - 1));
}

template <typename T>
constexpr size_t Chain::kExternalAllocatedSize() {
  return RawBlock::kExternalAllocatedSize<T>();
}

inline Chain::Chain(BytesRef src) { Initialize(src); }

inline Chain::Chain(ExternalRef src) { std::move(src).InitializeTo(*this); }

template <typename Src,
          std::enable_if_t<SupportsExternalRefWhole<Src>::value, int>>
inline Chain::Chain(Src&& src) {
  ExternalRef(std::forward<Src>(src)).InitializeTo(*this);
}

inline Chain::Chain(Block src) {
  if (src.raw_block() != nullptr) Initialize(std::move(src));
}

inline Chain::Chain(Chain&& that) noexcept
    : size_(std::exchange(that.size_, 0)) {
  // Use `std::memcpy()` instead of copy constructor to silence
  // `-Wmaybe-uninitialized` in gcc.
  std::memcpy(&block_ptrs_, &that.block_ptrs_, sizeof(BlockPtrs));
  if (that.has_here()) {
    // `that.has_here()` implies that `that.begin_ == that.block_ptrs_.here`
    // already.
    begin_ = block_ptrs_.here;
    end_ = block_ptrs_.here + (std::exchange(that.end_, that.block_ptrs_.here) -
                               that.block_ptrs_.here);
  } else {
    begin_ = std::exchange(that.begin_, that.block_ptrs_.here);
    end_ = std::exchange(that.end_, that.block_ptrs_.here);
  }
  // It does not matter what is left in `that.block_ptrs_` because `that.begin_`
  // and `that.end_` point to the empty prefix of `that.block_ptrs_.here[]`.
}

inline Chain& Chain::operator=(Chain&& that) noexcept {
  // Exchange `that.begin_` and `that.end_` early to support self-assignment.
  BlockPtr* begin;
  BlockPtr* end;
  if (that.has_here()) {
    // `that.has_here()` implies that `that.begin_ == that.block_ptrs_.here`
    // already.
    begin = block_ptrs_.here;
    end = block_ptrs_.here + (std::exchange(that.end_, that.block_ptrs_.here) -
                              that.block_ptrs_.here);
  } else {
    begin = std::exchange(that.begin_, that.block_ptrs_.here);
    end = std::exchange(that.end_, that.block_ptrs_.here);
  }
  UnrefBlocks();
  DeleteBlockPtrs();
  // It does not matter what is left in `that.block_ptrs_` because `that.begin_`
  // and `that.end_` point to the empty prefix of `that.block_ptrs_.here[]`. Use
  // `std::memcpy()` instead of assignment to silence `-Wmaybe-uninitialized` in
  // gcc.
  std::memcpy(&block_ptrs_, &that.block_ptrs_, sizeof(BlockPtrs));
  begin_ = begin;
  end_ = end;
  size_ = std::exchange(that.size_, 0);
  return *this;
}

inline Chain::~Chain() {
  UnrefBlocks();
  DeleteBlockPtrs();
}

inline void Chain::Reset() { Clear(); }

inline void Chain::Reset(ExternalRef src) { std::move(src).AssignTo(*this); }

template <typename Src,
          std::enable_if_t<SupportsExternalRefWhole<Src>::value, int>>
inline void Chain::Reset(Src&& src) {
  ExternalRef(std::forward<Src>(src)).AssignTo(*this);
}

inline void Chain::Clear() {
  size_ = 0;
  if (begin_ != end_) ClearSlow();
}

inline void Chain::Initialize(absl::string_view src) {
  RIEGELI_ASSERT_EQ(size_, 0u)
      << "Failed precondition of Chain::Initialize(string_view): "
         "size not reset";
  if (src.size() <= kMaxShortDataSize) {
    if (src.empty()) return;
    EnsureHasHere();
    size_ = src.size();
    std::memcpy(short_data_begin(), src.data(), src.size());
    return;
  }
  InitializeSlow(src);
}

inline void Chain::Initialize(Block src) {
  size_ = src.raw_block()->size();
  (end_++)->block_ptr = std::move(src).raw_block().Release();
}

inline absl::string_view Chain::short_data() const {
  return absl::string_view(short_data_begin(), size_);
}

inline char* Chain::short_data_begin() {
  RIEGELI_ASSERT_EQ(begin_, end_)
      << "Failed precondition of Chain::short_data_begin(): blocks exist";
  RIEGELI_ASSERT(empty() || has_here())
      << "Failed precondition of Chain::short_data_begin(): "
         "block pointer array is allocated";
  return block_ptrs_.short_data;
}

inline const char* Chain::short_data_begin() const {
  RIEGELI_ASSERT_EQ(begin_, end_)
      << "Failed precondition of Chain::short_data_begin(): blocks exist";
  RIEGELI_ASSERT(empty() || has_here())
      << "Failed precondition of Chain::short_data_begin(): "
         "block pointer array is allocated";
  return block_ptrs_.short_data;
}

inline void Chain::DeleteBlockPtrs() {
  if (has_allocated()) {
    std::allocator<BlockPtr>().deallocate(
        block_ptrs_.allocated.begin,
        2 * PtrDistance(block_ptrs_.allocated.begin,
                        block_ptrs_.allocated.end));
  }
}

inline void Chain::EnsureHasHere() {
  RIEGELI_ASSERT_EQ(begin_, end_)
      << "Failed precondition of Chain::EnsureHasHere(): blocks exist";
  if (ABSL_PREDICT_FALSE(has_allocated())) {
    DeleteBlockPtrs();
    begin_ = block_ptrs_.here;
    end_ = block_ptrs_.here;
  }
}

inline void Chain::UnrefBlocks() { UnrefBlocks(begin_, end_); }

inline void Chain::UnrefBlocks(const BlockPtr* begin, const BlockPtr* end) {
  if (begin != end) UnrefBlocksSlow(begin, end);
}

inline Chain::Blocks Chain::blocks() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return Blocks(this);
}

inline std::optional<absl::string_view> Chain::TryFlat() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  switch (end_ - begin_) {
    case 0:
      return short_data();
    case 1:
      return *front();
    default:
      return std::nullopt;
  }
}

inline absl::string_view Chain::Flatten() ABSL_ATTRIBUTE_LIFETIME_BOUND {
  switch (end_ - begin_) {
    case 0:
      return short_data();
    case 1:
      return *front();
    default:
      return FlattenSlow();
  }
}

inline absl::Span<char> Chain::AppendFixedBuffer(size_t length, Options options)
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return AppendBuffer(length, length, length, options);
}

inline absl::Span<char> Chain::PrependFixedBuffer(
    size_t length, Options options) ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return PrependBuffer(length, length, length, options);
}

inline void Chain::Append(ExternalRef src) { std::move(src).AppendTo(*this); }

inline void Chain::Append(ExternalRef src, Options options) {
  std::move(src).AppendTo(*this, options);
}

inline void Chain::Prepend(ExternalRef src) { std::move(src).PrependTo(*this); }

inline void Chain::Prepend(ExternalRef src, Options options) {
  std::move(src).PrependTo(*this, options);
}

template <typename Src,
          std::enable_if_t<SupportsExternalRefWhole<Src>::value, int>>
inline void Chain::Append(Src&& src) {
  ExternalRef(std::forward<Src>(src)).AppendTo(*this);
}

template <typename Src,
          std::enable_if_t<SupportsExternalRefWhole<Src>::value, int>>
inline void Chain::Append(Src&& src, Options options) {
  ExternalRef(std::forward<Src>(src)).AppendTo(*this, options);
}

template <typename Src,
          std::enable_if_t<SupportsExternalRefWhole<Src>::value, int>>
inline void Chain::Prepend(Src&& src) {
  ExternalRef(std::forward<Src>(src)).PrependTo(*this);
}

template <typename Src,
          std::enable_if_t<SupportsExternalRefWhole<Src>::value, int>>
inline void Chain::Prepend(Src&& src, Options options) {
  ExternalRef(std::forward<Src>(src)).PrependTo(*this, options);
}

template <typename HashState>
HashState Chain::HashValue(HashState hash_state) const {
  if (empty()) return HashState::combine(std::move(hash_state), size_t{0});
  RIEGELI_ASSERT(!blocks().empty());
  constexpr size_t kChunkSize = 256;
  char chunk[kChunkSize];

  // Hash chunks of size `kChunkSize` using `HashState::combine_contiguous()`.
  // The last chunk can be smaller; no chunk is empty. Then combine the size.
  size_t position = 0;
  for (size_t block_index = 0; block_index < blocks().size() - 1;
       ++block_index) {
    absl::string_view block = blocks()[block_index];
    if (block.size() < kChunkSize - position) {
      std::memcpy(chunk + position, block.data(), block.size());
      position += block.size();
      continue;
    }
    if (position > 0) {
      const size_t remaining = kChunkSize - position;
      std::memcpy(chunk + position, block.data(), remaining);
      hash_state = HashState::combine_contiguous(std::move(hash_state), chunk,
                                                 kChunkSize);
      block.remove_prefix(remaining);
    }
    while (block.size() >= kChunkSize) {
      hash_state = HashState::combine_contiguous(std::move(hash_state),
                                                 block.data(), kChunkSize);
      block.remove_prefix(kChunkSize);
    }
    std::memcpy(chunk, block.data(), block.size());
    position = block.size();
  }

  // The last block can be hashed without copying its last chunk if there are no
  // buffered data from the previous blocks.
  absl::string_view block = blocks().back();
  if (block.size() <= kChunkSize - position) {
    if (position > 0) {
      std::memcpy(chunk + position, block.data(), block.size());
      position += block.size();
      hash_state =
          HashState::combine_contiguous(std::move(hash_state), chunk, position);
    } else if (!block.empty()) {
      hash_state = HashState::combine_contiguous(std::move(hash_state),
                                                 block.data(), block.size());
    }
    return HashState::combine(std::move(hash_state), size());
  }
  if (position > 0) {
    const size_t remaining = kChunkSize - position;
    std::memcpy(chunk + position, block.data(), remaining);
    hash_state =
        HashState::combine_contiguous(std::move(hash_state), chunk, kChunkSize);
    block.remove_prefix(remaining);
  }
  while (block.size() > kChunkSize) {
    hash_state = HashState::combine_contiguous(std::move(hash_state),
                                               block.data(), kChunkSize);
    block.remove_prefix(kChunkSize);
  }
  RIEGELI_ASSERT(!block.empty());
  hash_state = HashState::combine_contiguous(std::move(hash_state),
                                             block.data(), block.size());
  return HashState::combine(std::move(hash_state), size());
}

template <typename Sink>
void Chain::Stringify(Sink& dest) const {
  for (const absl::string_view block : blocks()) dest.Append(block);
}

template <typename DebugStream>
void Chain::Debug(DebugStream& dest) const {
  dest.DebugStringQuote();
  for (const absl::string_view fragment : blocks()) {
    dest.DebugStringFragment(fragment);
  }
  dest.DebugStringQuote();
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_CHAIN_DETAILS_H_


================================================
FILE: riegeli/base/closing_ptr.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_CLOSING_PTR_H_
#define RIEGELI_BASE_CLOSING_PTR_H_

#include <memory>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// A deleter for `std::unique_ptr` which does nothing.
struct NullDeleter {
  template <typename T>
  void operator()(ABSL_ATTRIBUTE_UNUSED T* ptr) const {}
};

// Marks the pointer with the intent to transfer the responsibility to close the
// object when done with the pointer, even though the object is not moved nor
// destroyed.
//
// In the context of `Dependency` and `Any`, passing `ClosingPtr(&m)`
// instead of `std::move(m)` avoids moving `m`, but the caller must ensure that
// the dependent object is valid while the host object needs it.

template <typename T>
using ClosingPtrType = std::unique_ptr<T, NullDeleter>;

template <typename T>
inline ClosingPtrType<T> ClosingPtr(T* ptr) {
  return ClosingPtrType<T>(ptr);
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_CLOSING_PTR_H_


================================================
FILE: riegeli/base/compact_string.cc
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/base/compact_string.h"

#include <stddef.h>
#include <stdint.h>

#include <cstring>
#include <ostream>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/estimated_allocated_size.h"

namespace riegeli {

void CompactString::AssignSlow(absl::string_view src) {
  const size_t old_capacity = capacity();
  DeleteRepr(std::exchange(
      repr_,
      MakeRepr(src, UnsignedMax(src.size(), old_capacity + old_capacity / 2))));
}

void CompactString::AssignSlow(const CompactString& that) {
  const uintptr_t that_tag = that.repr_ & kTagMask;
  const size_t that_size = that.allocated_size_for_tag(that_tag);
  if (ABSL_PREDICT_TRUE(that_size <= capacity())) {
    set_size(that_size);
    // Use `std::memmove()` to support assigning from `*this`.
    std::memmove(data(), that.allocated_data(), that_size);
  } else {
    AssignSlow(absl::string_view(that.allocated_data(), that_size));
  }
}

uintptr_t CompactString::MakeReprSlow(size_t size, size_t capacity) {
  RIEGELI_ASSERT_LE(size, capacity)
      << "Failed precondition of CompactString::MakeReprSlow(): "
         "size exceeds capacity";
  RIEGELI_ASSERT_GT(capacity, kInlineCapacity)
      << "Failed precondition of CompactString::MakeReprSlow(): "
         "representation is inline, use MakeRepr() instead";
  uintptr_t repr;
  if (capacity <= 0xff) {
    const size_t requested =
        UnsignedMin(EstimatedAllocatedSize(capacity + 2), size_t{0xff + 2});
    repr = reinterpret_cast<uintptr_t>(Allocate(requested) + 2);
    set_allocated_capacity<uint8_t>(requested - 2, repr);
    set_allocated_size<uint8_t>(size, repr);
  } else if (capacity <= 0xffff) {
    const size_t requested =
        UnsignedMin(EstimatedAllocatedSize(capacity + 4), size_t{0xffff + 4});
    repr = reinterpret_cast<uintptr_t>(Allocate(requested) + 4);
    set_allocated_capacity<uint16_t>(requested - 4, repr);
    set_allocated_size<uint16_t>(size, repr);
  } else {
    static_assert(sizeof(size_t) % 4 == 0, "Unsupported size_t size");
    RIEGELI_CHECK_LE(capacity, max_size()) << "CompactString capacity overflow";
    const size_t requested =
        EstimatedAllocatedSize(capacity + 2 * sizeof(size_t));
    repr =
        reinterpret_cast<uintptr_t>(Allocate(requested) + 2 * sizeof(size_t));
    set_allocated_capacity<size_t>(requested - 2 * sizeof(size_t), repr);
    set_allocated_size<size_t>(size, repr);
  }
  return repr;
}

char* CompactString::ResizeSlow(size_t new_size, size_t min_capacity,
                                size_t used_size) {
  RIEGELI_ASSERT_LE(new_size, min_capacity)
      << "Failed precondition of CompactString::ResizeSlow(): "
         "size exceeds capacity";
  RIEGELI_ASSERT_LE(used_size, size())
      << "Failed precondition of CompactString::ResizeSlow(): "
         "used size exceeds old size";
  RIEGELI_ASSERT_LE(used_size, new_size)
      << "Failed precondition of CompactString::ResizeSlow(): "
         "used size exceeds new size";
  const size_t old_capacity = capacity();
  RIEGELI_ASSERT_GT(min_capacity, kInlineCapacity)
      << "Inline representation has a fixed capacity, so reallocation is never "
         "needed when the new capacity can use inline representation";
  const uintptr_t new_repr = MakeReprSlow(
      new_size, UnsignedMax(min_capacity, old_capacity + old_capacity / 2));
  char* ptr = allocated_data(new_repr);
  std::memcpy(ptr, data(), used_size);
  ptr += used_size;
  DeleteRepr(std::exchange(repr_, new_repr));
  return ptr;
}

void CompactString::ShrinkToFitSlow() {
  const uintptr_t tag = repr_ & kTagMask;
  RIEGELI_ASSERT_NE(tag, kInlineTag)
      << "Failed precondition of CompactString::ShrinkToFitSlow(): "
         "representation is inline, use shrink_to_fit() instead";
  size_t size;
  if (tag == 2) {
    size = allocated_size<uint8_t>();
    if (size > kInlineCapacity &&
        allocated_capacity<uint8_t>() + 2 <=
            UnsignedMin(EstimatedAllocatedSize(size + 2), size_t{0xff + 2})) {
      return;
    }
  } else if (tag == 4) {
    size = allocated_size<uint16_t>();
    if (size > 0xff &&
        allocated_capacity<uint16_t>() + 4 <=
            UnsignedMin(EstimatedAllocatedSize(size + 4), size_t{0xffff + 4})) {
      return;
    }
  } else if (tag == 0) {
    size = allocated_size<size_t>();
    if (size > 0xffff &&
        allocated_capacity<size_t>() + 2 * sizeof(size_t) <=
            EstimatedAllocatedSize(size + 2 * sizeof(size_t))) {
      return;
    }
  } else {
    RIEGELI_ASSUME_UNREACHABLE() << "Impossible tag: " << tag;
  }
  DeleteRepr(std::exchange(
      repr_, MakeRepr(absl::string_view(allocated_data(), size))));
}

char* CompactString::AppendSlow(size_t length) {
  const size_t old_size = size();
  RIEGELI_CHECK_LE(length, max_size() - old_size)
      << "CompactString size overflow";
  const size_t new_size = old_size + length;
  return ResizeSlow(new_size, new_size, old_size);
}

void CompactString::AppendSlow(absl::string_view src) {
  RIEGELI_ASSERT(!src.empty())
      << "Failed precondition of CompactString::AppendSlow(): "
         "nothing to append";
  const size_t old_size = size();
  const size_t old_capacity = capacity();
  RIEGELI_CHECK_LE(src.size(), max_size() - old_size)
      << "CompactString size overflow";
  const size_t new_size = old_size + src.size();
  RIEGELI_ASSERT_GT(new_size, kInlineCapacity)
      << "Inline representation has a fixed capacity, so reallocation is never "
         "needed when the new capacity can use inline representation";
  const uintptr_t new_repr = MakeReprSlow(
      new_size, UnsignedMax(new_size, old_capacity + old_capacity / 2));
  char* ptr = allocated_data(new_repr);
  std::memcpy(ptr, data(), old_size);
  ptr += old_size;
  // Copy from `src` before deleting `repr_` to support appending from a
  // substring of `*this`.
  std::memcpy(ptr, src.data(), src.size());
  DeleteRepr(std::exchange(repr_, new_repr));
}

void CompactString::ReserveOneMoreByteSlow() {
  const size_t used_size = size();
  RIEGELI_ASSERT_GT(used_size + 1, kInlineCapacity)
      << "Inline representation has a fixed capacity, so reallocation is never "
         "needed when the new capacity can use inline representation";
  const uintptr_t new_repr = MakeReprSlow(used_size, used_size + 1);
  char* const ptr = allocated_data(new_repr);
  std::memcpy(ptr, data(), used_size);
  DeleteRepr(std::exchange(repr_, new_repr));
}

void CompactString::DumpStructure(absl::string_view substr,
                                  std::ostream& dest) const {
  dest << "[compact_string] {";
  if (!substr.empty()) {
    if (substr.data() != data()) {
      dest << " space_before: " << PtrDistance(data(), substr.data());
    }
    dest << " space_after: "
         << PtrDistance(substr.data() + substr.size(), data() + capacity());
  }
  dest << " }";
}

}  // namespace riegeli


================================================
FILE: riegeli/base/compact_string.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_COMPACT_STRING_H_
#define RIEGELI_BASE_COMPACT_STRING_H_

#include <stddef.h>
#include <stdint.h>

#include <cstring>
#include <iosfwd>
#include <limits>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/optimization.h"
#include "absl/hash/hash.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/external_data.h"
#include "riegeli/base/new_aligned.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/type_traits.h"

namespace riegeli {

// `CompactString` provides a subset of functionality of `std::string`, while
// having less space overhead. It is useful for storing many short strings for
// a long time where each string owns its memory.
//
// A `CompactString` object internally consists of a pointer to heap-allocated
// data. The representation has 4 cases, distinguished by how the pointer is
// aligned modulo 8:
//  * 6 - not really a pointer but short string optimization: the size is
//        stored in bits [3..8), the data are stored in the remaining bytes
//  * 2 - the size is stored before the data as `uint8_t`
//  * 4 - the size is stored before the data as `uint16_t`
//  * 0 - the size is stored before the data as `size_t`
//
// In the last three cases the capacity is stored before the size in the same
// width as the size.
//
// The data are not necessarily NUL-terminated.
//
// Since `data()`, `size()`, `operator[]` etc. involve branches, for iteration
// it is faster to store the result of conversion to `absl::string_view` and
// iterate over that, or use `StringReader`, and for repeated appending it is
// faster to use `CompactStringWriter`.
//
// Memory usage of a `CompactString` of capacity c, assuming 8-byte pointers,
// where H(n) is memory usage of a heap-allocated block of length n:
//
//          c        | `CompactString` memory usage
//   ----------------|------------------------------
//        0 .. 7     | 8
//        8 .. 255   | 8 + H(c + 2)
//      256 .. 65535 | 8 + H(c + 4)
//    65536 .. max   | 8 + H(c + 16)
//
// For sizes up to 255 this is less than libc++ `std::string` by about 15, and
// less than libstdc++ `std::string` by about 23.
class ABSL_ATTRIBUTE_TRIVIAL_ABI CompactString
    : public WithCompare<CompactString> {
 public:
  static constexpr size_t max_size() {
    return std::numeric_limits<size_t>::max() - 2 * sizeof(size_t);
  }

  // Creates an empty `CompactString`.
  CompactString() = default;

  // Creates a `CompactString` with the given size and uninitialized data.
  explicit CompactString(size_t size) : repr_(MakeRepr(size)) {}

  // Creates a `CompactString` which holds a copy of `src`.
  explicit CompactString(BytesRef src) : repr_(MakeRepr(src)) {}
  CompactString& operator=(BytesRef src);

  // Creates a `CompactString` which holds a copy of `src`. Reserves one extra
  // char so that `c_str()` does not need reallocation.
  static CompactString ForCStr(BytesRef src) {
    return CompactString(FromReprTag(), MakeRepr(src, src.size() + 1));
  }

  CompactString(const CompactString& that);
  CompactString& operator=(const CompactString& that);

  // The source `CompactString` is left empty.
  CompactString(CompactString&& that) noexcept
      : repr_(std::exchange(that.repr_, kInlineTag)) {}
  CompactString& operator=(CompactString&& that) {
    DeleteRepr(std::exchange(repr_, std::exchange(that.repr_, kInlineTag)));
    return *this;
  }

  ~CompactString() { DeleteRepr(repr_); }

  // Views the value as an `absl::string_view`.
  /*implicit*/ operator absl::string_view() const ABSL_ATTRIBUTE_LIFETIME_BOUND;

  bool empty() const { return size() == 0; }
  char* data() ABSL_ATTRIBUTE_LIFETIME_BOUND;              // Never `nullptr`.
  const char* data() const ABSL_ATTRIBUTE_LIFETIME_BOUND;  // Never `nullptr`.
  size_t size() const;
  size_t capacity() const;

  char& operator[](size_t index) ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const char& operator[](size_t index) const ABSL_ATTRIBUTE_LIFETIME_BOUND;
  char& at(size_t index) ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const char& at(size_t index) const ABSL_ATTRIBUTE_LIFETIME_BOUND;
  char& front() ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const char& front() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
  char& back() ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const char& back() const ABSL_ATTRIBUTE_LIFETIME_BOUND;

  void clear() { set_size(0); }

  // Sets the size to `new_size` without reallocation.
  //
  // If `new_size <= size()`, the prefix of data with `new_size` is preserved.
  //
  // If `new_size >= size()`, all existing data are preserved and new data are
  // uninitialized.
  //
  // Precondition: `new_size <= capacity()`
  void set_size(size_t new_size);

  // Sets the size to `new_size`, reallocating if needed, ensuring that repeated
  // growth has the cost proportional to the final size.
  //
  // If `new_size <= size()`, the prefix of data with `new_size` is preserved.
  //
  // If `new_size >= size()`, all existing data are preserved and new data are
  // uninitialized.
  //
  // `resize(new_size)` is equivalent to `reserve(new_size)` followed by
  // `set_size(new_size)`.
  void resize(size_t new_size);

  // Sets the size to `new_size`, ensuring that repeated growth has the cost
  // proportional to the final size.
  //
  // The prefix of data with `used_size` is preserved.
  //
  // If `new_size > size()`, new data are uninitialized.
  //
  // Returns `data() + used_size`, for convenience of appending to previously
  // used data.
  //
  // `resize(new_size, used_size)` is equivalent to `set_size(used_size)`
  // followed by `resize(new_size)` and returning `data() + used_size`.
  // `resize(new_size)` is equivalent to `resize(new_size, size())`.
  //
  // Preconditions:
  //   `used_size <= size()`
  //   `used_size <= new_size`
  char* resize(size_t new_size, size_t used_size) ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Ensures that `capacity() >= min_capacity`, ensuring that repeated growth
  // has the cost proportional to the final size.
  void reserve(size_t min_capacity);

  void shrink_to_fit();

  // Appends `length` uninitialized data.
  //
  // Returns `data() + used_size` where `used_size` is `size()` before the call,
  // for convenience of appending to previously used data.
  //
  // `append(length)` is equivalent to `resize(size() + length, size())` with
  // a check against overflow of `size() + length`.
  char* append(size_t length) ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Appends `src`.
  void append(absl::string_view src);

  // Ensures that `data()` are NUL-terminated after `size()` and returns
  // `data()`.
  //
  // In contrast to `std::string::c_str()`, this is a non-const operation.
  // It may reallocate the string and it writes the NUL each time.
  const char* c_str() ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Returns the representation of the `CompactString` as `uintptr_t`.
  //
  // Ownership is transferred to the `uintptr_t`, the `CompactString` is
  // left empty. The `uintptr_t` must be passed exactly once to
  // `CompactString::MoveFromRaw()` to recover the `CompactString` and free its
  // memory.
  //
  // The returned `uintptr_t` is always even and never zero.
  uintptr_t RawMove() && { return std::exchange(repr_, kInlineTag); }

  // Returns a pointer to the representation of the `CompactString` as
  // `uintptr_t`.
  //
  // Ownership is not transferred and the `CompactString` is unchanged.
  //
  // The returned `uintptr_t` is always even and never zero.
  const uintptr_t* RawView() const { return &repr_; }

  // Recovers a `CompactString` from the representation returned by
  // `CompactString::RawMove()`.
  //
  // Ownership is transferred to the `CompactString`, `raw` must not be read
  // again.
  //
  // Calling `MoveFromRaw()` and dropping its result frees the memory of the
  // `CompactString`.
  static CompactString MoveFromRaw(const uintptr_t& raw) {
    RIEGELI_ASSERT_NE(raw, 0u)
        << "Failed precondition of CompactString::MoveFromRaw(): "
           "representation is zero";
    RIEGELI_ASSERT_EQ(raw & 1, 0u)
        << "Failed precondition of CompactString::MoveFromRaw(): "
           "representation is not even";
    const uintptr_t raw_copy = raw;
    // The original `raw` will possibly hold a pointer which had ownership
    // transferred and thus might no longer be valid. Hence reading `raw` again
    // is most likely a bug.
    MarkPoisoned(reinterpret_cast<const char*>(&raw), sizeof(uintptr_t));
    return CompactString(FromReprTag(), raw_copy);
  }

  // Views contents of a `CompactString` from the representation returned by
  // `CompactString::RawMove()` or `CompactString::RawView()`.
  //
  // Ownership is not transferred and `*raw` is unchanged.
  static absl::string_view ViewFromRaw(
      const uintptr_t* raw ABSL_ATTRIBUTE_LIFETIME_BOUND) {
    RIEGELI_ASSERT_NE(*raw, 0u)
        << "Failed precondition of CompactString::ViewFromRaw(): "
           "representation is zero";
    RIEGELI_ASSERT_EQ(*raw & 1, 0u)
        << "Failed precondition of CompactString::ViewFromRaw(): "
           "representation is not even";
    const uintptr_t tag = *raw & kTagMask;
    if (tag == kInlineTag) {
      return absl::string_view(inline_data(raw), inline_size(*raw));
    }
    return absl::string_view(allocated_data(*raw),
                             allocated_size_for_tag(tag, *raw));
  }

  // Returns the representation of a copy of the `CompactString` viewed from
  // the representation returned by `CompactString::RawMove()`.
  //
  // Equivalent to `RawMove(CompactString(ViewFromRaw(&raw)))`.
  static uintptr_t CopyRaw(uintptr_t raw) {
    RIEGELI_ASSERT_NE(raw, 0u)
        << "Failed precondition of CompactString::CopyRaw(): "
           "representation is zero";
    RIEGELI_ASSERT_EQ(raw & 1, 0u)
        << "Failed precondition of CompactString::CopyRaw(): "
           "representation is not even";
    const uintptr_t tag = raw & kTagMask;
    if (tag == kInlineTag) return raw;
    return MakeRepr(absl::string_view(allocated_data(raw),
                                      allocated_size_for_tag(tag, raw)));
  }

  static const char* CStrFromRaw(uintptr_t* raw);

  friend bool operator==(const CompactString& a, const CompactString& b) {
    return a.repr_ == b.repr_ || absl::string_view(a) == absl::string_view(b);
  }
  friend StrongOrdering RIEGELI_COMPARE(const CompactString& a,
                                        const CompactString& b) {
    if (a.repr_ == b.repr_) return StrongOrdering::equal;
    return riegeli::Compare(absl::string_view(a), absl::string_view(b));
  }

  template <
      typename T,
      std::enable_if_t<std::conjunction_v<NotSameRef<CompactString, T>,
                                          std::is_convertible<T&&, BytesRef>>,
                       int> = 0>
  friend bool operator==(const CompactString& a, T&& b) {
    return absl::string_view(a) == BytesRef(std::forward<T>(b));
  }
  template <
      typename T,
      std::enable_if_t<std::conjunction_v<NotSameRef<CompactString, T>,
                                          std::is_convertible<T&&, BytesRef>>,
                       int> = 0>
  friend StrongOrdering RIEGELI_COMPARE(const CompactString& a, T&& b) {
    return riegeli::Compare(absl::string_view(a), BytesRef(std::forward<T>(b)));
  }

  template <typename HashState>
  friend HashState AbslHashValue(HashState hash_state,
                                 const CompactString& self) {
    return HashState::combine(std::move(hash_state), absl::string_view(self));
  }

  // Default stringification by `absl::StrCat()` etc.
  template <typename Sink>
  friend void AbslStringify(Sink& dest, const CompactString& src) {
    dest.Append(absl::string_view(src));
  }

  friend std::ostream& operator<<(std::ostream& dest,
                                  const CompactString& src) {
    return dest << absl::string_view(src);
  }

  // Supports `absl::Format(&compact_string, format, args...)`.
  friend void AbslFormatFlush(CompactString* dest, absl::string_view src) {
    dest->append(src);
  }

  // Indicates support for:
  //  * `ExternalRef(CompactString&&)`
  //  * `ExternalRef(CompactString&&, substr)`
  friend void RiegeliSupportsExternalRef(CompactString*) {}

  // Supports `ExternalRef`.
  friend bool RiegeliExternalCopy(const CompactString* self) {
    return (self->repr_ & kTagMask) == kInlineTag;
  }

  // Supports `ExternalRef`.
  friend ExternalStorage RiegeliToExternalStorage(CompactString* self) {
    return ExternalStorage(
        reinterpret_cast<void*>(std::exchange(self->repr_, kInlineTag)),
        [](void* ptr) {
          const uintptr_t repr = reinterpret_cast<uintptr_t>(ptr);
          RIEGELI_ASSUME_NE(repr & kTagMask, kInlineTag)
              << "Failed precondition of "
                 "RiegeliToExternalStorage(CompactString*): "
                 "case excluded by RiegeliExternalCopy()";
          DeleteRepr(repr);
        });
  }

  // Supports `ExternalRef` and `Chain::Block`.
  friend void RiegeliDumpStructure(const CompactString* self,
                                   absl::string_view substr,
                                   std::ostream& dest) {
    self->DumpStructure(substr, dest);
  }

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const CompactString* self,
                                        MemoryEstimator& memory_estimator) {
    self->RegisterSubobjects(memory_estimator);
  }

 private:
  struct FromReprTag {
    explicit FromReprTag() = default;
  };

  explicit CompactString(FromReprTag, uintptr_t raw) : repr_(raw) {}

  static constexpr size_t kTagBits = 3;
  static constexpr uintptr_t kTagMask = (1u << kTagBits) - 1;
  static constexpr uintptr_t kInlineTag = 6;

  static constexpr size_t kInlineCapacity =
      UnsignedMin(sizeof(uintptr_t) - 1, size_t{0xff >> kTagBits});

#if ABSL_IS_LITTLE_ENDIAN
  static constexpr size_t kInlineDataOffset = 1;
#elif ABSL_IS_BIG_ENDIAN
  static constexpr size_t kInlineDataOffset = 0;
#else
#error Unknown endianness
#endif

  char* inline_data() { return inline_data(&repr_); }
  const char* inline_data() const { return inline_data(&repr_); }

  static char* inline_data(uintptr_t* repr) {
    RIEGELI_ASSERT_EQ(*repr & kTagMask, kInlineTag)
        << "Failed precondition of CompactString::inline_data(): "
           "representation not inline";
    return reinterpret_cast<char*>(repr) + kInlineDataOffset;
  }
  static const char* inline_data(const uintptr_t* repr) {
    RIEGELI_ASSERT_EQ(*repr & kTagMask, kInlineTag)
        << "Failed precondition of CompactString::inline_data(): "
           "representation not inline";
    return reinterpret_cast<const char*>(repr) + kInlineDataOffset;
  }

  size_t inline_size() const { return inline_size(repr_); }

  static size_t inline_size(uintptr_t repr) {
    RIEGELI_ASSERT_EQ(repr & kTagMask, kInlineTag)
        << "Failed precondition of CompactString::inline_size(): "
           "representation not inline";
    const size_t size = IntCast<size_t>((repr & 0xff) >> kTagBits);
    // This assumption helps the compiler to reason about comparisons with
    // `size()`.
    RIEGELI_ASSUME_LE(size, kInlineCapacity)
        << "Failed invariant of CompactString: "
           "inline size never exceeds kInlineCapacity";
    return size;
  }

  char* allocated_data() const { return allocated_data(repr_); }

  static char* allocated_data(uintptr_t repr) {
    RIEGELI_ASSERT_NE(repr & kTagMask, kInlineTag)
        << "Failed precondition of CompactString::allocated_data(): "
           "representation not allocated";
    return reinterpret_cast<char*>(repr);
  }

  size_t allocated_size_for_tag(uintptr_t tag) const {
    return allocated_size_for_tag(tag, repr_);
  }

  static size_t allocated_size_for_tag(uintptr_t tag, uintptr_t repr) {
    if (tag == 2) return allocated_size<uint8_t>(repr);
    if (tag == 4) return allocated_size<uint16_t>(repr);
    if (tag == 0) return allocated_size<size_t>(repr);
    RIEGELI_ASSUME_UNREACHABLE() << "Impossible tag: " << tag;
  }

  template <typename T>
  size_t allocated_size() const {
    return allocated_size<T>(repr_);
  }

  template <typename T>
  static size_t allocated_size(uintptr_t repr) {
    const uintptr_t tag = repr & kTagMask;
    RIEGELI_ASSERT_EQ(tag == 0 ? 2 * sizeof(size_t) : tag, 2 * sizeof(T))
        << "Failed precondition of CompactString::allocated_size(): "
           "tag does not match size representation";
    T stored_size;
    std::memcpy(&stored_size, allocated_data(repr) - sizeof(T), sizeof(T));
    return size_t{stored_size};
  }

  void set_inline_size(size_t size) { set_inline_size(size, repr_); }

  static void set_inline_size(size_t size, uintptr_t& repr) {
    RIEGELI_ASSERT_EQ(repr & kTagMask, kInlineTag)
        << "Failed precondition of CompactString::set_inline_size(): "
           "representation not inline";
    repr = (repr & ~(0xff & ~kTagMask)) | (size << kTagBits);
  }

  template <typename T>
  void set_allocated_size(size_t size) {
    set_allocated_size<T>(size, repr_);
  }

  template <typename T>
  static void set_allocated_size(size_t size, uintptr_t repr) {
    const uintptr_t tag = repr & kTagMask;
    RIEGELI_ASSERT_EQ(tag == 0 ? 2 * sizeof(size_t) : tag, 2 * sizeof(T))
        << "Failed precondition of CompactString::set_allocated_size(): "
           "tag does not match size representation";
    const T stored_size = IntCast<T>(size);
    std::memcpy(allocated_data(repr) - sizeof(T), &stored_size, sizeof(T));
  }

  void set_allocated_size_for_tag(uintptr_t tag, size_t new_size);

  size_t allocated_capacity_for_tag(uintptr_t tag) const {
    return allocated_capacity_for_tag(tag, repr_);
  }

  static size_t allocated_capacity_for_tag(uintptr_t tag, uintptr_t repr) {
    if (tag == 2) return allocated_capacity<uint8_t>(repr);
    if (tag == 4) return allocated_capacity<uint16_t>(repr);
    if (tag == 0) return allocated_capacity<size_t>(repr);
    RIEGELI_ASSUME_UNREACHABLE() << "Impossible tag: " << tag;
  }

  template <typename T>
  size_t allocated_capacity() const {
    return allocated_capacity<T>(repr_);
  }

  template <typename T>
  static size_t allocated_capacity(uintptr_t repr) {
    const uintptr_t tag = repr & kTagMask;
    RIEGELI_ASSERT_EQ(tag == 0 ? 2 * sizeof(size_t) : tag, 2 * sizeof(T))
        << "Failed precondition of CompactString::allocated_capacity(): "
           "tag does not match capacity representation";
    T stored_capacity;
    std::memcpy(&stored_capacity, allocated_data(repr) - 2 * sizeof(T),
                sizeof(T));
    // This assumption helps the compiler to reason about comparisons with
    // `capacity()`.
    RIEGELI_ASSUME_GT(stored_capacity, kInlineCapacity)
        << "Failed invariant of CompactString: "
           "allocated capacity always exceeds kInlineCapacity";
    return size_t{stored_capacity};
  }

  template <typename T>
  static void set_allocated_capacity(size_t capacity, uintptr_t repr) {
    const uintptr_t tag = repr & kTagMask;
    RIEGELI_ASSERT_EQ(tag == 0 ? 2 * sizeof(size_t) : tag, 2 * sizeof(T))
        << "Failed precondition of CompactString::set_allocated_capacity(): "
           "tag does not match capacity representation";
    const T stored_capacity = IntCast<T>(capacity);
    std::memcpy(allocated_data(repr) - 2 * sizeof(T), &stored_capacity,
                sizeof(T));
  }

  static char* Allocate(size_t size) {
    return static_cast<char*>(NewAligned<void, 8>(size));
  }
  static void Free(char* ptr, size_t size) {
    DeleteAligned<void, 8>(ptr, size);
  }

  static uintptr_t MakeRepr(size_t size, size_t capacity);
  static uintptr_t MakeReprSlow(size_t size, size_t capacity);
  static uintptr_t MakeRepr(size_t size);
  static uintptr_t MakeRepr(absl::string_view src, size_t capacity);
  static uintptr_t MakeRepr(absl::string_view src);
  static void DeleteRepr(uintptr_t repr);

  void AssignSlow(absl::string_view src);
  void AssignSlow(const CompactString& that);
  char* ResizeSlow(size_t new_size, size_t min_capacity, size_t used_size);
  void ShrinkToFitSlow();
  char* AppendSlow(size_t length);
  void AppendSlow(absl::string_view src);
  void ReserveOneMoreByteSlow();

  void DumpStructure(absl::string_view substr, std::ostream& dest) const;
  template <typename MemoryEstimator>
  void RegisterSubobjects(MemoryEstimator& memory_estimator) const;

  uintptr_t repr_ = kInlineTag;
};

// Hash and equality which support heterogeneous lookup.
struct CompactStringHash {
  using is_transparent = void;
  size_t operator()(const CompactString& value) const {
    return absl::Hash<CompactString>()(value);
  }
  size_t operator()(absl::string_view value) const {
    return absl::Hash<absl::string_view>()(value);
  }
};
struct CompactStringEq {
  using is_transparent = void;
  bool operator()(const CompactString& a, const CompactString& b) const {
    return a == b;
  }
  bool operator()(const CompactString& a, absl::string_view b) const {
    return a == b;
  }
  bool operator()(absl::string_view a, const CompactString& b) const {
    return a == b;
  }
  bool operator()(absl::string_view a, absl::string_view b) const {
    return a == b;
  }
};

// Implementation details follow.

inline uintptr_t CompactString::MakeRepr(size_t size, size_t capacity) {
  RIEGELI_ASSERT_LE(size, capacity)
      << "Failed precondition of CompactString::MakeRepr(): "
         "size greater than capacity";
  if (capacity <= kInlineCapacity) {
    return uintptr_t{(size << kTagBits) + kInlineTag};
  }
  return MakeReprSlow(size, capacity);
}

inline uintptr_t CompactString::MakeRepr(size_t size) {
  return MakeRepr(size, size);
}

inline uintptr_t CompactString::MakeRepr(absl::string_view src,
                                         size_t capacity) {
  uintptr_t repr = MakeRepr(src.size(), capacity);
  riegeli::null_safe_memcpy(
      capacity <= kInlineCapacity ? inline_data(&repr) : allocated_data(repr),
      src.data(), src.size());
  return repr;
}

inline uintptr_t CompactString::MakeRepr(absl::string_view src) {
  return MakeRepr(src, src.size());
}

inline void CompactString::DeleteRepr(uintptr_t repr) {
  const uintptr_t tag = repr & kTagMask;
  if (tag == kInlineTag) return;
  const size_t offset = tag == 0 ? 2 * sizeof(size_t) : IntCast<size_t>(tag);
  Free(allocated_data(repr) - offset,
       allocated_capacity_for_tag(tag, repr) + offset);
}

inline CompactString& CompactString::operator=(BytesRef src) {
  if (ABSL_PREDICT_TRUE(src.size() <= capacity())) {
    set_size(src.size());
    // Use `memmove()` to support assigning from a substring of `*this`.
    riegeli::null_safe_memmove(data(), src.data(), src.size());
  } else {
    AssignSlow(src);
  }
  return *this;
}

inline CompactString::CompactString(const CompactString& that) {
  const uintptr_t that_tag = that.repr_ & kTagMask;
  if (that_tag == kInlineTag) {
    repr_ = that.repr_;
  } else {
    repr_ = MakeRepr(absl::string_view(that.allocated_data(),
                                       that.allocated_size_for_tag(that_tag)));
  }
}

inline CompactString& CompactString::operator=(const CompactString& that) {
  const uintptr_t that_tag = that.repr_ & kTagMask;
  if (that_tag == kInlineTag) {
    const uintptr_t tag = repr_ & kTagMask;
    if (tag == kInlineTag) {
      repr_ = that.repr_;
    } else {
      set_allocated_size_for_tag(tag, that.inline_size());
      RIEGELI_ASSERT_LE(kInlineCapacity, capacity())
          << "Failed invariant of CompactString: "
             "inline capacity always fits in a capacity";
      // Copy fixed `kInlineCapacity` instead of variable `that.inline_size()`.
      std::memcpy(allocated_data(), that.inline_data(), kInlineCapacity);
      // The `#ifdef` helps the compiler to realize that computing the arguments
      // is unnecessary if `MarkPoisoned()` does nothing.
#ifdef MEMORY_SANITIZER
      // This part got unpoisoned by copying `kInlineCapacity` instead of
      // `that.inline_size()`. Poison it again.
      MarkPoisoned(allocated_data() + that.inline_size(),
                   kInlineCapacity - that.inline_size());
#endif
    }
  } else {
    AssignSlow(that);
  }
  return *this;
}

inline char* CompactString::data() ABSL_ATTRIBUTE_LIFETIME_BOUND {
  const uintptr_t tag = repr_ & kTagMask;
  if (tag == kInlineTag) return inline_data();
  return allocated_data();
}

inline const char* CompactString::data() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
  const uintptr_t tag = repr_ & kTagMask;
  if (tag == kInlineTag) return inline_data();
  return allocated_data();
}

inline size_t CompactString::size() const {
  const uintptr_t tag = repr_ & kTagMask;
  if (tag == kInlineTag) return inline_size();
  return allocated_size_for_tag(tag);
}

inline size_t CompactString::capacity() const {
  const uintptr_t tag = repr_ & kTagMask;
  if (tag == kInlineTag) return kInlineCapacity;
  return allocated_capacity_for_tag(tag);
}

inline CompactString::operator absl::string_view() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return ViewFromRaw(&repr_);
}

inline char& CompactString::operator[](size_t index)
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_ASSERT_LT(index, size())
      << "Failed precondition of CompactString::operator[]: index out of range";
  return data()[index];
}

inline const char& CompactString::operator[](size_t index) const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_ASSERT_LT(index, size())
      << "Failed precondition of CompactString::operator[]: index out of range";
  return data()[index];
}

inline char& CompactString::at(size_t index) ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_CHECK_LT(index, size())
      << "Failed precondition of CompactString::at(): index out of range";
  return data()[index];
}

inline const char& CompactString::at(size_t index) const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_CHECK_LT(index, size())
      << "Failed precondition of CompactString::at(): index out of range";
  return data()[index];
}

inline char& CompactString::front() ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_ASSERT(!empty())
      << "Failed precondition of CompactString::front(): empty string";
  return data()[0];
}

inline const char& CompactString::front() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_ASSERT(!empty())
      << "Failed precondition of CompactString::front(): empty string";
  return data()[0];
}

inline char& CompactString::back() ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_ASSERT(!empty())
      << "Failed precondition of CompactString::back(): empty string";
  return data()[size() - 1];
}

inline const char& CompactString::back() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_ASSERT(!empty())
      << "Failed precondition of CompactString::back(): empty string";
  return data()[size() - 1];
}

inline void CompactString::set_size(size_t new_size) {
  RIEGELI_ASSERT_LE(new_size, capacity())
      << "Failed precondition of CompactString::SetSize(): size out of range";
  const uintptr_t tag = repr_ & kTagMask;
  if (tag == kInlineTag) {
    set_inline_size(new_size);
    return;
  }
  set_allocated_size_for_tag(tag, new_size);
}

inline void CompactString::set_allocated_size_for_tag(uintptr_t tag,
                                                      size_t new_size) {
  // The `#ifdef` helps the compiler to realize that computing the arguments is
  // unnecessary if `MarkPoisoned()` does nothing.
#ifdef MEMORY_SANITIZER
  if (new_size < allocated_size_for_tag(tag)) {
    MarkPoisoned(allocated_data() + new_size,
                 allocated_size_for_tag(tag) - new_size);
  }
#endif
  if (tag == 2) {
    set_allocated_size<uint8_t>(new_size);
  } else if (tag == 4) {
    set_allocated_size<uint16_t>(new_size);
  } else if (tag == 0) {
    set_allocated_size<size_t>(new_size);
  } else {
    RIEGELI_ASSUME_UNREACHABLE() << "Impossible tag: " << tag;
  }
}

inline void CompactString::resize(size_t new_size) {
  if (ABSL_PREDICT_TRUE(new_size <= capacity())) {
    set_size(new_size);
    return;
  }
  ResizeSlow(new_size, new_size, size());
}

inline char* CompactString::resize(size_t new_size, size_t used_size)
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_ASSERT_LE(used_size, size())
      << "Failed precondition of CompactString::resize(): "
         "used size exceeds old size";
  RIEGELI_ASSERT_LE(used_size, new_size)
      << "Failed precondition of CompactString::resize(): "
         "used size exceeds new size";
  if (ABSL_PREDICT_TRUE(new_size <= capacity())) {
    // The `#ifdef` helps the compiler to realize that computing the arguments
    // is unnecessary if `MarkPoisoned()` does nothing.
#ifdef MEMORY_SANITIZER
    const uintptr_t tag = repr_ & kTagMask;
    if (tag != kInlineTag) {
      MarkPoisoned(
          allocated_data() + used_size,
          UnsignedMin(allocated_size_for_tag(tag), new_size) - used_size);
    }
#endif
    set_size(new_size);
    return data() + used_size;
  }
  return ResizeSlow(new_size, new_size, used_size);
}

inline void CompactString::reserve(size_t min_capacity) {
  if (ABSL_PREDICT_TRUE(min_capacity <= capacity())) return;
  const size_t used_size = size();
  ResizeSlow(used_size, min_capacity, used_size);
}

inline void CompactString::shrink_to_fit() {
  const uintptr_t tag = repr_ & kTagMask;
  if (tag == kInlineTag) return;
  ShrinkToFitSlow();
}

inline char* CompactString::append(size_t length)
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  const size_t old_size = size();
  const size_t old_capacity = capacity();
  if (ABSL_PREDICT_TRUE(length <= old_capacity - old_size)) {
    set_size(old_size + length);
    return data() + old_size;
  }
  return AppendSlow(length);
}

inline void CompactString::append(absl::string_view src) {
  const size_t old_size = size();
  const size_t old_capacity = capacity();
  if (ABSL_PREDICT_TRUE(src.size() <= old_capacity - old_size)) {
    set_size(old_size + src.size());
    riegeli::null_safe_memcpy(data() + old_size, src.data(), src.size());
    return;
  }
  AppendSlow(src);
}

inline const char* CompactString::c_str() ABSL_ATTRIBUTE_LIFETIME_BOUND {
  const size_t used_size = size();
  // Allocate just enough for NUL, do not call `reserve(used_size + 1)` here
  // because that could overallocate by 50%. In `c_str()` it is likely that the
  // string already has its final value.
  if (ABSL_PREDICT_FALSE(used_size == capacity())) ReserveOneMoreByteSlow();
  char* const ptr = data();
  ptr[used_size] = '\0';
  return ptr;
}

inline const char* CompactString::CStrFromRaw(uintptr_t* raw) {
  RIEGELI_ASSERT_NE(*raw, 0u)
      << "Failed precondition of CompactString::CStrFromRaw(): "
         "representation is zero";
  RIEGELI_ASSERT_EQ(*raw & 1, 0u)
      << "Failed precondition of CompactString::CStrFromRaw(): "
         "representation is not even";
  uintptr_t tag = *raw & kTagMask;
  char* ptr;
  size_t used_size;
  size_t capacity;
  if (tag == kInlineTag) {
    ptr = inline_data(raw);
    used_size = inline_size(*raw);
    capacity = kInlineCapacity;
  } else {
    ptr = allocated_data(*raw);
    used_size = allocated_size_for_tag(tag, *raw);
    capacity = allocated_capacity_for_tag(tag, *raw);
  }
  if (ABSL_PREDICT_FALSE(used_size == capacity)) {
    CompactString str = CompactString::MoveFromRaw(*raw);
    str.ReserveOneMoreByteSlow();
    *raw = std::move(str).RawMove();
    tag = *raw & kTagMask;
    ptr = allocated_data(*raw);
    used_size = allocated_size_for_tag(tag, *raw);
  }
  ptr[used_size] = '\0';
  return ptr;
}

template <typename MemoryEstimator>
inline void CompactString::RegisterSubobjects(
    MemoryEstimator& memory_estimator) const {
  const uintptr_t tag = repr_ & kTagMask;
  if (tag == kInlineTag) return;
  const size_t offset = tag == 0 ? 2 * sizeof(size_t) : IntCast<size_t>(tag);
  memory_estimator.RegisterDynamicMemory(
      allocated_data() - offset, offset + allocated_capacity_for_tag(tag));
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_COMPACT_STRING_H_


================================================
FILE: riegeli/base/compare.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_COMPARE_H_
#define RIEGELI_BASE_COMPARE_H_

#include <type_traits>

#include "absl/base/nullability.h"
#include "absl/strings/string_view.h"  // IWYU pragma: keep
#if !__cpp_impl_three_way_comparison
#include "absl/types/compare.h"
#endif

ABSL_POINTERS_DEFAULT_NONNULL

// Emulate C++20 `operator<=>` machinery for earlier C++ versions.

namespace riegeli {

// `PartialOrdering` is `std::partial_ordering` in C++20 or
// `absl::partial_ordering` in earlier C++ versions.
#if __cpp_impl_three_way_comparison
using PartialOrdering = decltype(0.0 <=> 0.0);
#else
using PartialOrdering = absl::partial_ordering;
#endif

// `WeakOrdering` is not provided because it cannot be implemented without
// conditionally including `<compare>`.

// `StrongOrdering` is `std::strong_ordering` in C++20 or
// `absl::strong_ordering` in earlier C++ versions.
#if __cpp_impl_three_way_comparison
using StrongOrdering = decltype(0 <=> 0);
#else
using StrongOrdering = absl::strong_ordering;
#endif

// Define `friend auto RIEGELI_COMPARE` instead of C++20
// `friend auto operator<=>`.
//
// It should return `PartialOrdering` or `StrongOrdering`.
//
// It is meant to be called by `riegeli::Compare(a, b)`, not directly as
// `RIEGELI_COMPARE(a, b)`.
#if __cpp_impl_three_way_comparison
#define RIEGELI_COMPARE operator<=>
#else
#define RIEGELI_COMPARE RiegeliCompare
#endif

// `IsOrdering<T>::value` is `true` if values of type `T` can be assumed to
// indicate an ordering: they are comparable with literal 0.
//
// This includes `{std,absl}::{partial,weak,strong}_ordering`, and `int` being
// the result of `std::memcmp()` or `absl::string_view::compare()`.

template <typename T, typename Enable = void>
struct IsOrdering : std::false_type {};

template <typename T>
struct IsOrdering<T, std::void_t<decltype(std::declval<T>() < 0),
                                 decltype(std::declval<T>() > 0),
                                 decltype(std::declval<T>() == 0)>>
    : std::true_type {};

// `IsTotalOrdering<T>::value` is `true` if values of type `T` can be assumed to
// indicate a total ordering: they are comparable with literal 0, and
// `T::unordered` is not defined.
//
// This includes `{std,absl}::{weak,strong}_ordering`, and `int` being the
// result of `std::memcmp()` or `absl::string_view::compare()`.

template <typename T, typename Enable = void>
struct IsTotalOrdering : IsOrdering<T> {};

template <typename T>
struct IsTotalOrdering<T, std::void_t<decltype(T::unordered)>>
    : std::false_type {};

namespace compare_internal {

template <typename T, typename Enable = void>
struct IsTotalOrderingWithEqual : std::false_type {};

template <typename T>
struct IsTotalOrderingWithEqual<T, std::void_t<decltype(T::equal)>>
    : IsTotalOrdering<T> {};

}  // namespace compare_internal

// `IsStrongOrdering<T>::value` is `true` if values of type `T` can be assumed
// to indicate a strong ordering: they are comparable with literal 0,
// `T::unordered` is not defined, and either `T::equivalent` is not defined or
// `T::equal` is defined too.
//
// This includes `{std,absl}::strong_ordering`, and `int` being the result of
// `std::memcmp()` or `absl::string_view::compare()`.

template <typename T, typename Enable = void>
struct IsStrongOrdering : IsTotalOrdering<T> {};

template <typename T>
struct IsStrongOrdering<T, std::void_t<decltype(T::equivalent)>>
    : compare_internal::IsTotalOrderingWithEqual<T> {};

// Converts a value indicating an ordering to `PartialOrdering`.

template <typename T,
          std::enable_if_t<
              std::conjunction_v<IsOrdering<T>,
                                 std::is_convertible<T, PartialOrdering>>,
              int> = 0>
inline PartialOrdering AsPartialOrdering(T ordering) {
  return ordering;
}

template <typename T,
          std::enable_if_t<std::conjunction_v<IsOrdering<T>,
                                              std::negation<std::is_convertible<
                                                  T, PartialOrdering>>>,
                           int> = 0>
inline PartialOrdering AsPartialOrdering(T ordering) {
  return ordering < 0    ? PartialOrdering::less
         : ordering > 0  ? PartialOrdering::greater
         : ordering == 0 ? PartialOrdering::equivalent
                         : PartialOrdering::unordered;
}

// Converts a value indicating a strong ordering to `StrongOrdering`.

template <
    typename T,
    std::enable_if_t<std::conjunction_v<IsStrongOrdering<T>,
                                        std::is_convertible<T, StrongOrdering>>,
                     int> = 0>
inline StrongOrdering AsStrongOrdering(T ordering) {
  return ordering;
}

template <
    typename T,
    std::enable_if_t<std::conjunction_v<
                         IsStrongOrdering<T>,
                         std::negation<std::is_convertible<T, StrongOrdering>>>,
                     int> = 0>
inline StrongOrdering AsStrongOrdering(T ordering) {
  return ordering < 0   ? StrongOrdering::less
         : ordering > 0 ? StrongOrdering::greater
                        : StrongOrdering::equal;
}

#if !__cpp_impl_three_way_comparison

// Definitions of `RIEGELI_COMPARE` which in C++20 are provided automatically.

template <
    typename A, typename B,
    std::enable_if_t<
        std::conjunction_v<std::is_integral<A>, std::is_integral<B>>, int> = 0>
inline StrongOrdering RIEGELI_COMPARE(A a, B b) {
  return a < b   ? StrongOrdering::less
         : a > b ? StrongOrdering::greater
                 : StrongOrdering::equal;
}

template <typename A, typename B,
          std::enable_if_t<
              std::conjunction_v<std::negation<std::conjunction<
                                     std::is_integral<A>, std::is_integral<B>>>,
                                 std::is_arithmetic<A>, std::is_arithmetic<B>>,
              int> = 0>
inline PartialOrdering RIEGELI_COMPARE(A a, B b) {
  static_assert(std::is_floating_point_v<A> || std::is_floating_point_v<B>,
                "Arithmetic types which are not integral types "
                "must be floating point types");
  return a < b    ? PartialOrdering::less
         : a > b  ? PartialOrdering::greater
         : a == b ? PartialOrdering::equivalent
                  : PartialOrdering::unordered;
}

template <typename T, std::enable_if_t<std::is_enum_v<T>, int> = 0>
inline StrongOrdering RIEGELI_COMPARE(T a, T b) {
  return a < b   ? StrongOrdering::less
         : a > b ? StrongOrdering::greater
                 : StrongOrdering::equal;
}

template <typename T>
inline StrongOrdering RIEGELI_COMPARE(T* a, T* b) {
  return a < b   ? StrongOrdering::less
         : a > b ? StrongOrdering::greater
                 : StrongOrdering::equal;
}

inline StrongOrdering RIEGELI_COMPARE(absl::string_view a,
                                      absl::string_view b) {
  return AsStrongOrdering(a.compare(b));
}

#endif

namespace compare_internal {

#if !__cpp_impl_three_way_comparison

template <typename A, typename B, typename Enable = void>
struct HasEqual : std::false_type {};

template <typename A, typename B>
struct HasEqual<
    A, B,
    std::void_t<decltype(std::declval<const A&>() == std::declval<const B&>())>>
    : std::true_type {};

#endif

template <typename A, typename B, typename Enable = void>
struct HasCompare : std::false_type {};

template <typename A, typename B>
struct HasCompare<A, B,
                  std::void_t<decltype(
#if __cpp_impl_three_way_comparison
                      std::declval<const A&>() <=> std::declval<const B&>()
#else
                      RIEGELI_COMPARE(std::declval<const A&>(),
                                      std::declval<const B&>())
#endif
                          )>> : std::true_type {
};

template <typename T, typename Enable = void>
struct IsDedicatedOrdering : std::false_type {};

template <typename T>
struct IsDedicatedOrdering<T,
                           std::void_t<decltype(T::less), decltype(T::greater)>>
    : std::true_type {};

template <typename T, typename Enable = void>
struct HasCompareWithLiteral0 : std::false_type {};

template <typename T>
struct HasCompareWithLiteral0<T, std::void_t<decltype(
#if __cpp_impl_three_way_comparison
                                     0 <=> std::declval<T>()
#else
                                     RIEGELI_COMPARE(0, std::declval<T>())
#endif
                                         )>> : std::true_type {
};

}  // namespace compare_internal

// Call `riegeli::Compare(a, b)` instead of C++20 `a <=> b`.
template <typename A, typename B,
          std::enable_if_t<compare_internal::HasCompare<A, B>::value, int> = 0>
inline auto Compare(const A& a, const B& b) {
#if __cpp_impl_three_way_comparison
  return a <=> b;
#else
  return RIEGELI_COMPARE(a, b);
#endif
}

// Call `NegateOrdering(ordering)` instead of C++20 `0 <=> ordering`.
//
// `riegeli::Compare(0, ordering)` does not work because it does not properly
// forward to `<=>` the property that the argument is a literal 0.

template <
    typename Ordering,
    std::enable_if_t<
        std::conjunction_v<compare_internal::IsDedicatedOrdering<Ordering>,
                           compare_internal::HasCompareWithLiteral0<Ordering>>,
        int> = 0>
inline Ordering NegateOrdering(Ordering ordering) {
#if __cpp_impl_three_way_comparison
  return 0 <=> ordering;
#else
  return RIEGELI_COMPARE(0, ordering);
#endif
}

template <
    typename Ordering,
    std::enable_if_t<
        std::conjunction_v<
            compare_internal::IsDedicatedOrdering<Ordering>,
            std::negation<compare_internal::HasCompareWithLiteral0<Ordering>>>,
        int> = 0>
inline Ordering NegateOrdering(Ordering ordering) {
  if (0 < ordering) return Ordering::less;
  if (0 > ordering) return Ordering::greater;
  return ordering;
}

// For types which support equality, derive `T` from `WithEqual<T>`, and define
// `friend bool operator==` with the first parameter of type `const T&` or `T`,
// and the second parameter of the same type, or possibly also of other types.
//
// `WithEqual` provides `!=`. For heterogeneous equality it provides `==` and
// `!=` with swapped parameters.
//
// In C++20 this is automatic.
template <typename T>
class WithEqual {
 public:
#if !__cpp_impl_three_way_comparison
  template <
      typename Other,
      std::enable_if_t<compare_internal::HasEqual<T, Other>::value, int> = 0>
  friend bool operator!=(const T& a, const Other& b) {
    return !(a == b);
  }

  template <
      typename Other,
      std::enable_if_t<std::conjunction_v<std::negation<std::is_same<Other, T>>,
                                          compare_internal::HasEqual<T, Other>>,
                       int> = 0>
  friend bool operator==(const Other& a, const T& b) {
    return b == a;
  }
  template <
      typename Other,
      std::enable_if_t<std::conjunction_v<std::negation<std::is_same<Other, T>>,
                                          compare_internal::HasEqual<T, Other>>,
                       int> = 0>
  friend bool operator!=(const Other& a, const T& b) {
    return !(b == a);
  }
#endif
};

// For types which support comparison, derive `T` from `WithCompare<T>`. and
// define `friend bool operator==` and `friend auto RIEGELI_COMPARE` with the
// first parameter of type `const T&` or `T`, and the second parameter of the
// same type, or possibly also of other types.
//
// `WithCompare` provides `!=`, `<`, `>`, `<=`, and `>=`. For heterogeneous
// comparison it provides `==`, `!=`, `RIEGELI_COMPARE, `<`, `>`, `<=`, and `>=`
// with swapped parameters.
//
// In C++20 this is automatic.
template <typename T>
class WithCompare : public WithEqual<T> {
 public:
#if !__cpp_impl_three_way_comparison
  template <
      typename Other,
      std::enable_if_t<compare_internal::HasCompare<T, Other>::value, int> = 0>
  friend bool operator<(const T& a, const Other& b) {
    return RIEGELI_COMPARE(a, b) < 0;
  }
  template <
      typename Other,
      std::enable_if_t<compare_internal::HasCompare<T, Other>::value, int> = 0>
  friend bool operator>(const T& a, const Other& b) {
    return RIEGELI_COMPARE(a, b) > 0;
  }
  template <
      typename Other,
      std::enable_if_t<compare_internal::HasCompare<T, Other>::value, int> = 0>
  friend bool operator<=(const T& a, const Other& b) {
    return RIEGELI_COMPARE(a, b) <= 0;
  }
  template <
      typename Other,
      std::enable_if_t<compare_internal::HasCompare<T, Other>::value, int> = 0>
  friend bool operator>=(const T& a, const Other& b) {
    return RIEGELI_COMPARE(a, b) >= 0;
  }

  template <typename Other,
            std::enable_if_t<
                std::conjunction_v<std::negation<std::is_same<Other, T>>,
                                   compare_internal::HasCompare<T, Other>>,
                int> = 0>
  friend auto RIEGELI_COMPARE(const Other& a, const T& b) {
    return NegateOrdering(RIEGELI_COMPARE(b, a));
  }
  template <typename Other,
            std::enable_if_t<
                std::conjunction_v<std::negation<std::is_same<Other, T>>,
                                   compare_internal::HasCompare<T, Other>>,
                int> = 0>
  friend bool operator<(const Other& a, const T& b) {
    return 0 < RIEGELI_COMPARE(b, a);
  }
  template <typename Other,
            std::enable_if_t<
                std::conjunction_v<std::negation<std::is_same<Other, T>>,
                                   compare_internal::HasCompare<T, Other>>,
                int> = 0>
  friend bool operator>(const Other& a, const T& b) {
    return 0 > RIEGELI_COMPARE(b, a);
  }
  template <typename Other,
            std::enable_if_t<
                std::conjunction_v<std::negation<std::is_same<Other, T>>,
                                   compare_internal::HasCompare<T, Other>>,
                int> = 0>
  friend bool operator<=(const Other& a, const T& b) {
    return 0 <= RIEGELI_COMPARE(b, a);
  }
  template <typename Other,
            std::enable_if_t<
                std::conjunction_v<std::negation<std::is_same<Other, T>>,
                                   compare_internal::HasCompare<T, Other>>,
                int> = 0>
  friend bool operator>=(const Other& a, const T& b) {
    return 0 >= RIEGELI_COMPARE(b, a);
  }
#endif
};

}  // namespace riegeli

#endif  // RIEGELI_BASE_COMPARE_H_


================================================
FILE: riegeli/base/constexpr.h
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_CONSTEXPR_H_
#define RIEGELI_BASE_CONSTEXPR_H_

#include "absl/base/nullability.h"
#include "riegeli/base/port.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// Returns `true` if the value of the expression is known at compile time.
#if RIEGELI_INTERNAL_HAS_BUILTIN(__builtin_constant_p) || \
    RIEGELI_INTERNAL_IS_GCC_VERSION(3, 1)
#define RIEGELI_IS_CONSTANT(expr) __builtin_constant_p(expr)
#else
#define RIEGELI_IS_CONSTANT(expr) false
#endif

}  // namespace riegeli

#endif  // RIEGELI_BASE_CONSTEXPR_H_


================================================
FILE: riegeli/base/cord_iterator_span.cc
================================================
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/base/cord_iterator_span.h"

#include <stddef.h>

#include <cstring>
#include <string>

#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/strings/cord.h"
#include "absl/strings/resize_and_overwrite.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/string_utils.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

void CordIteratorSpan::ReadSlow(absl::Cord::CharIterator& src, size_t length,
                                char* dest) {
  absl::string_view chunk = absl::Cord::ChunkRemaining(src);
  RIEGELI_ASSERT_LT(chunk.size(), length)
      << "Failed precondition of CordIteratorSpan::ReadSlow(): "
         "enough data available, use Read() instead";
  do {
    std::memcpy(dest, chunk.data(), chunk.size());
    absl::Cord::Advance(&src, chunk.size());
    dest += chunk.size();
    length -= chunk.size();
    chunk = absl::Cord::ChunkRemaining(src);
  } while (chunk.size() < length);
  std::memcpy(dest, chunk.data(), length);
  absl::Cord::Advance(&src, length);
}

absl::string_view CordIteratorSpan::ToStringView(std::string& scratch) && {
  absl::Cord::CharIterator& iter = *iterator_;
  size_t length = length_;
  if (length == 0) return absl::string_view();
  absl::string_view chunk = absl::Cord::ChunkRemaining(iter);
  if (ABSL_PREDICT_TRUE(chunk.size() >= length)) {
    absl::Cord::Advance(&iter, length);
    return chunk.substr(0, length);
  }
  scratch.clear();
  riegeli::StringResizeAndOverwriteAmortized(scratch, length,
                                             [&](char* data, size_t size) {
                                               ReadSlow(iter, size, data);
                                               return size;
                                             });
  return scratch;
}

void CordIteratorSpan::ToString(std::string& dest) && {
  absl::Cord::CharIterator& iter = *iterator_;
  size_t length = length_;
  dest.clear();
  absl::StringResizeAndOverwrite(dest, length, [&](char* data, size_t size) {
    Read(iter, size, data);
    return size;
  });
}

}  // namespace riegeli


================================================
FILE: riegeli/base/cord_iterator_span.h
================================================
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_CORD_ITERATOR_SPAN_H_
#define RIEGELI_BASE_CORD_ITERATOR_SPAN_H_

#include <stddef.h>

#include <cstring>
#include <optional>
#include <string>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// `CordIteratorSpan` specifies a span of `absl::Cord::CharIterator` contents
// from the current position with the given length.
//
// This can express the span as a single object, which is sometimes convenient.
class CordIteratorSpan {
 public:
  // Returns the number of bytes from `src` to the end of the `absl::Cord`.
  static size_t Remaining(const absl::Cord::CharIterator& src) {
    return IntCast<size_t>(
        absl::Cord::Distance(src, absl::Cord::CharIterator()));
  }

  // Copies `length` bytes from `src` to `dest[]`.
  static void Read(absl::Cord::CharIterator& src, size_t length,
                   char* absl_nullable dest);

  // Specifies the span from the current position of `*src` with `length`.
  explicit CordIteratorSpan(absl::Cord::CharIterator* src
                                ABSL_ATTRIBUTE_LIFETIME_BOUND,
                            size_t length)
      : iterator_(src), length_(length) {
    RIEGELI_ASSERT_LE(length, Remaining(*iterator_))
        << "Failed precondition of CordIteratorSpan: not enough remaining data";
  }

  CordIteratorSpan(CordIteratorSpan&& that) = default;
  CordIteratorSpan& operator=(CordIteratorSpan&& that) = default;

  absl::Cord::CharIterator& iterator() const { return *iterator_; }
  size_t length() const { return length_; }

  // Destructively reads the contents of the span to an `absl::Cord`.
  //
  // An implicit conversion allows to use a `CordIteratorSpan` when an
  // `absl::Cord` is expected. Some functions treat a parameter of type
  // `CordIteratorSpan` specially to enable a more efficient implementation.
  /*implicit*/ operator absl::Cord() && { return std::move(*this).ToCord(); }

  // Destructively reads the contents of the span to an `absl::Cord`.
  absl::Cord ToCord() && {
    return absl::Cord::AdvanceAndRead(iterator_, length_);
  }

  // Destructively reads the contents of the span to an `absl::string_view`.
  //
  // May use `scratch` for storage for the result.
  absl::string_view ToStringView(std::string& scratch) &&;

  // Destructively reads the contents of the span to an existing `std::string`.
  void ToString(std::string& dest) &&;

  // Returns the contents of the span as an `absl::string_view` if it is flat.
  // Otherwise returns `std::nullopt`.
  std::optional<absl::string_view> TryFlat() const;

 private:
  static void ReadSlow(absl::Cord::CharIterator& src, size_t length,
                       char* dest);

  absl::Cord::CharIterator* iterator_;
  size_t length_;
};

// Specialization of `DependencyImpl<const absl::Cord*, CordIteratorSpan>`.
//
// This allows to pass a `CordIteratorSpan` as a parameter of `CordReader`.
template <>
class DependencyImpl<const absl::Cord*, CordIteratorSpan> {
 public:
  explicit DependencyImpl(CordIteratorSpan span)
      : span_(std::move(span)), cord_(std::move(span_)) {}

  CordIteratorSpan& manager() ABSL_ATTRIBUTE_LIFETIME_BOUND { return span_; }
  const CordIteratorSpan& manager() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return span_;
  }

  const absl::Cord* get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return &cord_; }

  bool IsOwning() const { return false; }

  static constexpr bool kIsStable = false;

 protected:
  DependencyImpl(DependencyImpl&& that) = default;
  DependencyImpl& operator=(DependencyImpl&& that) = default;

  ~DependencyImpl() = default;

 private:
  CordIteratorSpan span_;
  const absl::Cord cord_;
};

// Implementation details follow.

inline void CordIteratorSpan::Read(absl::Cord::CharIterator& src, size_t length,
                                   char* absl_nullable dest) {
  RIEGELI_ASSERT_LE(length, Remaining(src))
      << "Failed precondition of CordIteratorSpan::Read(): "
         "not enough remaining data";
  if (length == 0) return;
  RIEGELI_ASSERT(dest != nullptr)
      << "Failed precondition of CordIteratorSpan::Read(): "
         "non-empty span from nullptr";
  const absl::string_view chunk = absl::Cord::ChunkRemaining(src);
  if (ABSL_PREDICT_FALSE(chunk.size() < length)) {
    ReadSlow(src, length, dest);
    return;
  }
  std::memcpy(dest, chunk.data(), length);
  absl::Cord::Advance(&src, length);
}

inline std::optional<absl::string_view> CordIteratorSpan::TryFlat() const {
  if (length_ == 0) return absl::string_view();
  absl::string_view chunk = absl::Cord::ChunkRemaining(*iterator_);
  if (ABSL_PREDICT_FALSE(chunk.size() < length_)) return std::nullopt;
  return chunk.substr(0, length_);
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_CORD_ITERATOR_SPAN_H_


================================================
FILE: riegeli/base/cord_utils.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/base/cord_utils.h"

#include <stddef.h>

#include <cstring>
#include <utility>

#include "absl/base/nullability.h"
#include "absl/strings/cord.h"
#include "absl/strings/cord_buffer.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/string_utils.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli::cord_internal {

void CopyCordToArray(const absl::Cord& src, char* absl_nullable dest) {
  for (const absl::string_view fragment : src.Chunks()) {
    std::memcpy(dest, fragment.data(), fragment.size());
    dest += fragment.size();
  }
}

absl::Cord MakeBlockyCord(absl::string_view src) {
  absl::Cord dest;
  AppendToBlockyCord(src, dest);
  return dest;
}

void AssignToBlockyCord(absl::string_view src, absl::Cord& dest) {
  if (src.size() <= absl::CordBuffer::kDefaultLimit) {
    dest = src;
    return;
  }
  dest.Clear();
  AppendToBlockyCord(src, dest);
}

void AppendToBlockyCord(absl::string_view src, absl::Cord& dest) {
  if (src.empty()) return;
  {
    absl::CordBuffer buffer = dest.GetAppendBuffer(0, 1);
    const size_t existing_length = buffer.length();
    if (existing_length > 0) {
      buffer.SetLength(
          UnsignedMin(existing_length + src.size(), buffer.capacity()));
      std::memcpy(buffer.data() + existing_length, src.data(),
                  buffer.length() - existing_length);
      src.remove_prefix(buffer.length() - existing_length);
      dest.Append(std::move(buffer));
      if (src.empty()) return;
    }
  }
  do {
    absl::CordBuffer buffer = absl::CordBuffer::CreateWithCustomLimit(
        kCordBufferBlockSize, src.size());
    buffer.SetLength(UnsignedMin(src.size(), buffer.capacity()));
    std::memcpy(buffer.data(), src.data(), buffer.length());
    src.remove_prefix(buffer.length());
    dest.Append(std::move(buffer));
  } while (!src.empty());
}

void PrependToBlockyCord(absl::string_view src, absl::Cord& dest) {
  while (!src.empty()) {
    absl::CordBuffer buffer = absl::CordBuffer::CreateWithCustomLimit(
        kCordBufferBlockSize, src.size());
    buffer.SetLength(UnsignedMin(src.size(), buffer.capacity()));
    std::memcpy(buffer.data(), src.data() + src.size() - buffer.length(),
                buffer.length());
    src.remove_suffix(buffer.length());
    dest.Prepend(std::move(buffer));
  }
}

}  // namespace riegeli::cord_internal


================================================
FILE: riegeli/base/cord_utils.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_CORD_UTILS_H_
#define RIEGELI_BASE_CORD_UTILS_H_

#include <stddef.h>
#include <stdint.h>

#include "absl/base/nullability.h"
#include "absl/numeric/bits.h"
#include "absl/strings/cord.h"
#include "absl/strings/cord_buffer.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/buffering.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli::cord_internal {

// `absl::cord_internal::kFlatOverhead`. Does not have to be accurate.
inline constexpr size_t kFlatOverhead =
    sizeof(size_t) + sizeof(uint32_t) + sizeof(uint8_t);

// The `block_size` parameter for `absl::CordBuffer::CreateWithCustomLimit()`.
inline constexpr size_t kCordBufferBlockSize =
    UnsignedMin(kDefaultMaxBlockSize, absl::CordBuffer::kCustomLimit);

// Maximum usable size supported by `absl::CordBuffer`.
inline constexpr size_t kCordBufferMaxSize =
    absl::CordBuffer::MaximumPayload(kCordBufferBlockSize);

// When deciding whether to copy an array of bytes or share memory to an
// `absl::Cord`, prefer copying up to this length when creating a new
// `absl::Cord`.
//
// This is `absl::cord_internal::kMaxInline`. Does not have to be accurate.
inline constexpr size_t kMaxBytesToCopyToEmptyCord = 15;

// When deciding whether to copy an array of bytes or share memory to an
// `absl::Cord`, prefer copying up to this length when appending to a non-empty
// `absl::Cord`.
//
// This is `absl::cord_internal::kMaxBytesToCopy`. Does not have to be accurate.
inline constexpr size_t kMaxBytesToCopyToNonEmptyCord = 511;

// When deciding whether to copy an array of bytes or share memory to an
// `absl::Cord`, prefer copying up to this length when appending to `dest`.
//
// `absl::Cord::Append(absl::Cord)` chooses to copy bytes from a source up to
// this length, so it is better to avoid constructing the source as `absl::Cord`
// if it will not be shared anyway.
inline size_t MaxBytesToCopyToCord(absl::Cord& dest) {
  if (dest.empty()) return kMaxBytesToCopyToEmptyCord;
  return kMaxBytesToCopyToNonEmptyCord;
}

// Copies `src` to `dest[]`.
//
// `dest[]` must have sufficient size for `src.size()`, and `dest` may be
// `nullptr` only if `src.empty()`.
void CopyCordToArray(const absl::Cord& src, char* absl_nullable dest);

// Variants of `absl::Cord` operations with different block sizing tradeoffs:
//  * `MakeBlockyCord(src)` is like `absl::Cord(src)`.
//  * `AssignToBlockyCord(src, dest)` is like `dest = src`.
//  * `AppendToBlockyCord(src, dest)` is like `dest.Append(src)`.
//  * `PrependToBlockyCord(src, dest)` is like `dest.Prepend(src)`.
//
// They avoid splitting `src` into 4083-byte fragments and avoid overallocation,
// without guarantees.
absl::Cord MakeBlockyCord(absl::string_view src);
void AssignToBlockyCord(absl::string_view src, absl::Cord& dest);
void AppendToBlockyCord(absl::string_view src, absl::Cord& dest);
void PrependToBlockyCord(absl::string_view src, absl::Cord& dest);

// Returns usable size provided by `absl::CordBuffer::CreateWithCustomLimit()`
// called with `kCordBufferBlockSize` and `capacity`. Does not have to be
// accurate.
inline size_t CordBufferSizeForCapacity(size_t capacity) {
  if (capacity >= kCordBufferMaxSize) return kCordBufferMaxSize;
  if (capacity <= absl::CordBuffer::kDefaultLimit) return capacity;
  if (!absl::has_single_bit(capacity)) {
    static constexpr size_t kMaxPageSlop = 128;
    const size_t rounded_up = size_t{1} << absl::bit_width(capacity - 1);
    const size_t slop = rounded_up - capacity;
    if (slop >= kFlatOverhead && slop <= kMaxPageSlop + kFlatOverhead) {
      capacity = rounded_up;
    } else {
      const size_t rounded_down = size_t{1} << (absl::bit_width(capacity) - 1);
      capacity = rounded_down;
    }
  }
  return capacity - kFlatOverhead;
}

}  // namespace riegeli::cord_internal

#endif  // RIEGELI_BASE_CORD_UTILS_H_


================================================
FILE: riegeli/base/debug.cc
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/base/debug.h"

#include <stdint.h>

#include <cstddef>
#include <optional>
#include <type_traits>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

namespace {

inline void WriteHex1(uint8_t src, DebugStream& dest) {
  dest.Write(static_cast<char>(src + (src < 10 ? '0' : 'a' - 10)));
}

inline void WriteHex2(uint8_t src, DebugStream& dest) {
  WriteHex1(static_cast<uint8_t>(src >> 4), dest);
  WriteHex1(static_cast<uint8_t>(src & 0x0f), dest);
}

inline void WriteHex4(uint16_t src, DebugStream& dest) {
  WriteHex2(static_cast<uint8_t>(src >> 8), dest);
  WriteHex2(static_cast<uint8_t>(src & 0xff), dest);
}

inline void WriteHex8(uint32_t src, DebugStream& dest) {
  WriteHex4(static_cast<uint16_t>(src >> 16), dest);
  WriteHex4(static_cast<uint16_t>(src & 0xffff), dest);
}

template <char quote, typename IntType, typename CharType>
void WriteChar(CharType src, DebugStream& dest) {
  if (src >= 32 && src <= 126) {
    if (src == quote || src == '\\') dest.Write('\\');
    dest.Write(static_cast<char>(src));
    return;
  }
  switch (src) {
    case '\t':
      dest.Write("\\t");
      break;
    case '\n':
      dest.Write("\\n");
      break;
    case '\r':
      dest.Write("\\r");
      break;
    default: {
      const auto unsigned_src = static_cast<IntType>(src);
      if (unsigned_src <= 0xff) {
        dest.Write("\\x{");
        WriteHex2(static_cast<uint8_t>(unsigned_src), dest);
      } else {
        dest.Write("\\u{");
        if (unsigned_src <= 0xffff) {
          WriteHex4(static_cast<uint16_t>(unsigned_src), dest);
        } else {
          WriteHex8(unsigned_src, dest);
        }
      }
      dest.Write('}');
      break;
    }
  }
}

template <typename IntType, typename CharType>
void WriteQuotedChar(CharType src, DebugStream& dest) {
  dest.Write('\'');
  WriteChar<'\'', IntType>(src, dest);
  dest.Write('\'');
}

template <typename IntType, typename CharType>
void WriteQuotedString(absl::Span<const CharType> src, DebugStream& dest) {
  dest.Write('"');
  for (const CharType ch : src) {
    WriteChar<'"', IntType>(ch, dest);
  }
  dest.Write('"');
}

}  // namespace

void DebugStream::DebugStringFragment(absl::string_view src) {
  for (const char ch : src) {
    WriteChar<'"', uint8_t>(ch, *this);
  }
}

void RiegeliDebug(bool src, DebugStream& dest) {
  dest.Write(src ? absl::string_view("true") : absl::string_view("false"));
}

void RiegeliDebug(char src, DebugStream& dest) {
  WriteQuotedChar<uint8_t>(src, dest);
}

void RiegeliDebug(wchar_t src, DebugStream& dest) {
  WriteQuotedChar<std::conditional_t<sizeof(wchar_t) == 2, uint16_t, uint32_t>>(
      src, dest);
}

#if __cpp_char8_t
void RiegeliDebug(char8_t src, DebugStream& dest) {
  WriteQuotedChar<uint8_t>(src, dest);
}
#endif  // __cpp_char8_t

void RiegeliDebug(char16_t src, DebugStream& dest) {
  WriteQuotedChar<uint16_t>(src, dest);
}

void RiegeliDebug(char32_t src, DebugStream& dest) {
  WriteQuotedChar<uint32_t>(src, dest);
}

void RiegeliDebug(absl::string_view src, DebugStream& dest) {
  WriteQuotedString<uint8_t>(absl::MakeConstSpan(src), dest);
}

void RiegeliDebug(std::wstring_view src, DebugStream& dest) {
  WriteQuotedString<
      std::conditional_t<sizeof(wchar_t) == 2, uint16_t, uint32_t>>(
      absl::MakeConstSpan(src), dest);
}

#if __cpp_char8_t
void RiegeliDebug(std::u8string_view src, DebugStream& dest) {
  WriteQuotedString<uint8_t>(absl::MakeConstSpan(src), dest);
}
#endif  // __cpp_char8_t

void RiegeliDebug(std::u16string_view src, DebugStream& dest) {
  WriteQuotedString<uint16_t>(absl::MakeConstSpan(src), dest);
}

void RiegeliDebug(std::u32string_view src, DebugStream& dest) {
  WriteQuotedString<uint32_t>(absl::MakeConstSpan(src), dest);
}

void RiegeliDebug(const absl::Cord& src, DebugStream& dest) {
  dest.DebugStringQuote();
  for (const absl::string_view fragment : src.Chunks()) {
    dest.DebugStringFragment(fragment);
  }
  dest.DebugStringQuote();
}

void RiegeliDebug(const void* absl_nullable src, DebugStream& dest) {
  if (src == nullptr) {
    dest.Write("nullptr");
  } else {
    dest << src;
  }
}

void RiegeliDebug(ABSL_ATTRIBUTE_UNUSED std::nullptr_t src, DebugStream& dest) {
  dest.Write("nullptr");
}

void RiegeliDebug(ABSL_ATTRIBUTE_UNUSED std::nullopt_t src, DebugStream& dest) {
  dest.Write("nullopt");
}

}  // namespace riegeli


================================================
FILE: riegeli/base/debug.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_DEBUG_H_
#define RIEGELI_BASE_DEBUG_H_

#include <stddef.h>

#include <cstddef>
#include <ios>
#include <memory>
#include <optional>
#include <ostream>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/strings/cord.h"
#include "absl/strings/has_absl_stringify.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/stream_utils.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

class DebugStream;

namespace debug_internal {

template <typename T, typename Enable = void>
struct HasRiegeliDebug : std::false_type {};

template <typename T>
struct HasRiegeliDebug<
    T, std::void_t<decltype(RiegeliDebug(std::declval<const T&>(),
                                         std::declval<DebugStream&>()))>>
    : std::true_type {};

template <typename T, typename Enable = void>
struct HasDebugString : std::false_type {};

template <typename T>
struct HasDebugString<
    T,
    std::enable_if_t<std::is_convertible_v<
        decltype(std::declval<const T&>().DebugString()), absl::string_view>>>
    : std::true_type {};

template <typename T, typename Enable = void>
struct HasOperatorOutput : std::false_type {};

template <typename T>
struct HasOperatorOutput<T, std::void_t<decltype(std::declval<std::ostream&>()
                                                 << std::declval<T>())>>
    : std::true_type {};

}  // namespace debug_internal

// `SupportsDebug<T>::value` is `true` if `T` supports `riegeli::Debug()`:
// writing the value in a format suitable for error messages.
//
// The value is generally written in a way which reflects as much as is compared
// by `operator==`, without indicating the type nor internal structure, using
// syntax similar to C++ expressions.
template <typename T>
struct SupportsDebug
    : std::disjunction<
          debug_internal::HasRiegeliDebug<T>, debug_internal::HasDebugString<T>,
          absl::HasAbslStringify<T>, debug_internal::HasOperatorOutput<T>> {};

// To customize `riegeli::Debug()` for a class `T`, define a free function
// `friend void RiegeliDebug(const T& src, DebugStream& dest)` as a friend of
// `T` inside class definition or in the same namespace as `T`, so that it can
// be found via ADL. `DebugStream` in the parameter type can also be a template
// parameter to reduce library dependencies.
//
// `riegeli::Debug(src)` uses the first defined form among the following:
//  * `RiegeliDebug(src, dest)`
//  * `src.DebugString()`
//  * `dest << src`
//  * `AbslStringify(dest, src)`
class DebugStream {
 public:
  // Will write to `dest`.
  explicit DebugStream(std::ostream* dest ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : dest_(dest) {}

  DebugStream(const DebugStream& that) = default;
  DebugStream& operator=(const DebugStream& that) = default;

  // Writes a character using `std::ostream::write()`.
  void Write(char src) { dest_->write(&src, 1); }

  // Writes a string using `std::ostream::write()`.
  void Write(absl::string_view src) {
    dest_->write(src.data(), static_cast<std::streamsize>(src.size()));
  }

  // Writes a value formatted using `operator<<`.
  //
  // Using stream manipulators is supported, but if the stream state is not
  // reset to the default before calling `Debug()`, then the results can be
  // inconsistent, depending on the type being written.
  template <typename T>
  DebugStream& operator<<(T&& src) {
    *dest_ << std::forward<T>(src);
    return *this;
  }

  // Writes a value in a format suitable for error messages. This calls the
  // first defined form among the following:
  //  * `RiegeliDebug(src, *this)`
  //  * `Write(src.DebugString())`
  //  * `AbslStringify(sink, src)` for `OStreamStringifySink(dest)`
  //  * `*dest << src`
  //
  // This is used to implement `riegeli::Debug()`, and to write subobjects by
  // implementations of `RiegeliDebug()` for objects containing them.
  template <typename T, std::enable_if_t<SupportsDebug<T>::value, int> = 0>
  void Debug(const T& src) {
    if constexpr (debug_internal::HasRiegeliDebug<T>::value) {
      RiegeliDebug(src, *this);
    } else if constexpr (debug_internal::HasDebugString<T>::value) {
      Write(src.DebugString());
    } else if constexpr (debug_internal::HasOperatorOutput<T>::value) {
      *dest_ << src;
    } else {
      static_assert(absl::HasAbslStringify<T>::value);
      OStreamStringifySink sink(dest_);
      AbslStringify(sink, src);
    }
  }

  // To implement `RiegeliDebug()` for string-like types which are not
  // represented as one fragment, the following pattern can be used:
  //
  // ```
  //   dest.DebugStringQuote();
  //   for (const absl::string_view fragment : fragments) {
  //     dest.DebugStringFragment(fragment);
  //   }
  //   dest.DebugStringQuote();
  // ```
  //
  // If the representation is always flat, relying on `Debug()` for
  // `absl::string_view` is sufficient.
  void DebugStringQuote() { Write('"'); }
  void DebugStringFragment(absl::string_view src);

 private:
  std::ostream* dest_;
};

// The following overloads cover supported types which do not define
// `RiegeliDebug()` themselves.

// `bool` is written as `true` or `false`.
void RiegeliDebug(bool src, DebugStream& dest);

// Non-bool and non-character numeric types, including `signed char` and
// `unsigned char`, are written as numbers.
inline void RiegeliDebug(signed char src, DebugStream& dest) {
  dest << int{src};
}
inline void RiegeliDebug(unsigned char src, DebugStream& dest) {
  dest << unsigned{src};
}
inline void RiegeliDebug(short src, DebugStream& dest) { dest << src; }
inline void RiegeliDebug(unsigned short src, DebugStream& dest) { dest << src; }
inline void RiegeliDebug(int src, DebugStream& dest) { dest << src; }
inline void RiegeliDebug(unsigned src, DebugStream& dest) { dest << src; }
inline void RiegeliDebug(long src, DebugStream& dest) { dest << src; }
inline void RiegeliDebug(unsigned long src, DebugStream& dest) { dest << src; }
inline void RiegeliDebug(long long src, DebugStream& dest) { dest << src; }
inline void RiegeliDebug(unsigned long long src, DebugStream& dest) {
  dest << src;
}
inline void RiegeliDebug(float src, DebugStream& dest) { dest << src; }
inline void RiegeliDebug(double src, DebugStream& dest) { dest << src; }
inline void RiegeliDebug(long double src, DebugStream& dest) { dest << src; }

// Character types are written in C++ character literal format.
void RiegeliDebug(char src, DebugStream& dest);
void RiegeliDebug(wchar_t src, DebugStream& dest);
#if __cpp_char8_t
void RiegeliDebug(char8_t src, DebugStream& dest);
#endif
void RiegeliDebug(char16_t src, DebugStream& dest);
void RiegeliDebug(char32_t src, DebugStream& dest);

// Enumeration types are written like their underlying types.
template <typename T, std::enable_if_t<std::is_enum_v<T>, int> = 0>
void RiegeliDebug(T src, DebugStream& dest) {
  dest.Debug(static_cast<std::underlying_type_t<T>>(src));
}

// `absl::string_view` is written in C++ string literal format.
//
// This covers types implicitly convertible to `absl::string_view` like
// `std::string` and `CompactString`.
void RiegeliDebug(absl::string_view src, DebugStream& dest);

void RiegeliDebug(std::wstring_view src, DebugStream& dest);
#if __cpp_char8_t
void RiegeliDebug(std::u8string_view src, DebugStream& dest);
#endif
void RiegeliDebug(std::u16string_view src, DebugStream& dest);
void RiegeliDebug(std::u32string_view src, DebugStream& dest);

// `absl::Cord` is written in C++ string literal format.
void RiegeliDebug(const absl::Cord& src, DebugStream& dest);

// A null pointer is written as "nullptr". Other data pointers, including char
// pointers, as well as function pointers, are written using `operator<<` for
// `const void*`.
void RiegeliDebug(std::nullptr_t src, DebugStream& dest);
void RiegeliDebug(const void* absl_nullable src, DebugStream& dest);
template <typename T, std::enable_if_t<std::is_function_v<T>, int> = 0>
void RiegeliDebug(T* absl_nullable src, DebugStream& dest) {
  dest.Debug(reinterpret_cast<void*>(src));
}

// `std::unique_ptr` and `std::shared_ptr` are written like pointers.
template <typename T, typename Deleter>
void RiegeliDebug(const absl_nullable std::unique_ptr<T, Deleter>& src,
                  DebugStream& dest) {
  dest.Debug(src.get());
}
template <typename T>
void RiegeliDebug(const absl_nullable std::shared_ptr<T>& src,
                  DebugStream& dest) {
  dest.Debug(src.get());
}

// `std::optional` values are written as "nullopt" when absent, or as the
// underlying data wrapped in braces when present.
void RiegeliDebug(std::nullopt_t src, DebugStream& dest);
template <typename T, std::enable_if_t<SupportsDebug<T>::value, int> = 0>
void RiegeliDebug(const std::optional<T>& src, DebugStream& dest) {
  if (src == std::nullopt) {
    dest.Debug(std::nullopt);
  } else {
    dest.Write('{');
    dest.Debug(*src);
    dest.Write('}');
  }
}

// The type returned by `riegeli::Debug()`.
template <typename T>
class DebugType {
 public:
  template <typename DependentT = T,
            std::enable_if_t<!std::is_rvalue_reference_v<DependentT>, int> = 0>
  explicit DebugType(const T& src) : src_(src) {}
  template <typename DependentT = T,
            std::enable_if_t<!std::is_lvalue_reference_v<DependentT>, int> = 0>
  explicit DebugType(T&& src) : src_(std::forward<T>(src)) {}

  DebugType(const DebugType& that) = default;
  DebugType& operator=(const DebugType& that) = default;

  DebugType(DebugType&& that) = default;
  DebugType& operator=(DebugType&& that) = default;

  template <typename Sink>
  friend void AbslStringify(Sink& dest, const DebugType& src) {
    StringifyOStream stream(&dest);
    DebugStream(&stream).Debug(src.src_);
  }

  // Faster implementation if `Sink` is `OStreamStringifySink`.
  friend void AbslStringify(OStreamStringifySink& dest, const DebugType& src) {
    DebugStream(dest.dest()).Debug(src.src_);
  }

  friend std::ostream& operator<<(std::ostream& dest, const DebugType& src) {
    DebugStream(&dest).Debug(src.src_);
    return dest;
  }

  std::string ToString() const {
    std::string dest;
    StringOStream stream(&dest);
    DebugStream(&stream).Debug(src_);
    return dest;
  }

 private:
  T src_;
};

template <typename T>
explicit DebugType(T&& src) -> DebugType<std::decay_t<T>>;

// `riegeli::Debug()` wraps an object such that it is formatted using
// `DebugStream::Debug()` when explicitly converted to `std::string` or written
// using `AbslStringify()` or `operator<<`.
//
// `riegeli::Debug()` does not own the object, even if it involves temporaries,
// hence it should be stringified by the same expression which constructed it,
// so that the temporaries outlive its usage. For storing a `DebugType` in a
// variable or returning it from a function, construct `DebugType` directly.
template <typename T, std::enable_if_t<SupportsDebug<T>::value, int> = 0>
inline DebugType<const T&> Debug(const T& src ABSL_ATTRIBUTE_LIFETIME_BOUND) {
  return DebugType<const T&>(src);
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_DEBUG_H_


================================================
FILE: riegeli/base/dependency.h
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_DEPENDENCY_H_
#define RIEGELI_BASE_DEPENDENCY_H_

#include <cstddef>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/dependency_manager.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/type_traits.h"

namespace riegeli {

// `Dependency<Handle, Manager>` stores or refers to an optionally owned object
// which is stored as type `Manager` and accessed through type `Handle`.
//
// When a dependent object is said to be owned by a host object or function, the
// host is responsible for closing it when done, and certain other operations
// are propagated to it. The host is usually also responsible for destroying the
// owned object.
//
// Often `Handle` is some pointer `Base*`, and then `Manager` can be e.g.
// `T` (owned), `T*` (not owned), `std::unique_ptr<T, Deleter>` (owned),
// or `Any<T*>` (maybe owned), with some `T` derived from `Base`.
//
// Often `Dependency<Handle, Manager>` is a member of a host class template
// parameterized by `Manager`, with `Handle` fixed by the host class. The member
// is initialized from an argument of a constructor or a resetting function.
// A user of the host class specifies ownership of the dependent object and
// possibly narrows its type by choosing the `Manager` template argument of the
// host class. The `Manager` type can be deduced from a constructor argument
// using CTAD, which is usually done by removing any toplevel references and
// `const` qualifiers using `std::decay`.
//
// As an alternative to passing `std::move(manager)`, passing
// `ClosingPtr(&manager)` avoids moving `manager`, but the caller must ensure
// that the dependent object is valid while the host object needs it.
//
// `Manager` can also be `T&` (not owned) or `T&&` (owned). They are primarily
// meant to be used with a host function rather than a host object, because such
// a dependency stores only a reference to the dependent object. By convention a
// reference argument is expected to be valid for the duration of the function
// call but not necessarily after the function returns. The `Manager` type is
// usually deduced from a function argument as a reference type rather than
// using `std::decay`.
//
// `Manager` being `T&` is functionally equivalent to `T*`, but offers a more
// idiomatic API for passing an object which does not need to be valid after the
// function returns.
//
// `Manager` being `T&&` is similar to `ClosingPtrType<T>`. In contrast to a
// host class, a host function does not decay `T&&` to `T` and avoids moving
// the `Manager`, because the dependent object can be expected to be valid for
// the duration of the function call.

// `Dependency<Handle, Manager>` derives from `DependencyImpl<Handle, Manager>`
// which has specializations for various combinations of `Handle` and `Manager`
// types. Some operations of `Dependency` are provided by `DependencyImpl`,
// others are added by `Dependency` in a uniform way.
//
// `DependencyImpl<Handle, Manager>` specializations often derive from
// `DependencyManager<Manager>` or `DependencyBase<Manager>`.
//
// `DependencyManager<Manager>` provides a preliminary interpretation of
// `Manager` independently from `Handle`. This interpretation is then refined by
// `DependencyImpl`.

// Operations of `Dependency<Handle, Manager>`:
//
// ```
//   // Constructs a dummy `Manager` from
//   // `RiegeliDependencySentinel(static_cast<Manager*>(nullptr))`. Used
//   // when the host object is closed and does not need a dependent object.
//   //
//   // Supported optionally.
//   //
//   // Provided by `DependencyBase` and explicitly inherited.
//   Dependency();
//
//   // Copies or moves a `Manager`. Used to specify the initial value of the
//   // dependent object.
//   //
//   // Provided by `DependencyBase` and explicitly inherited.
//   explicit Dependency(Initializer<Manager> manager);
//
//   // Copies the dependency.
//   //
//   // Supported optionally.
//   Dependency(const Dependency& that) noexcept;
//   Dependency& operator=(const Dependency& that) noexcept;
//
//   // Moves the dependency.
//   //
//   // Supported optionally.
//   Dependency(Dependency&& that) noexcept;
//   Dependency& operator=(Dependency&& that) noexcept;
//
//   // Makes `*this` equivalent to a newly constructed Dependency. This avoids
//   // constructing a temporary Dependency and moving from it.
//   //
//   // The overload with no parameters is supported when the corresponding
//   // constructor is supported.
//   //
//   // Provided by `DependencyBase`.
//   ABSL_ATTRIBUTE_REINITIALIZES void Reset();
//   ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Manager> manager);
//
//   // Exposes the stored `Manager`.
//   //
//   // Provided by `DependencyBase` or `DependencyImpl`.
//   Manager& manager();
//   const Manager& manager() const;
//
//   // The type returned by `get()`.
//   //
//   // Provided by `Dependency`, not `DependencyImpl`.
//   using Subhandle = ...;
//
//   // Returns a `Handle` to the `Manager`.
//   //
//   // `get()` might return a subtype of `Handle` which retains more static
//   // type information about `Manager`, e.g. a pointer to a class derived from
//   // what `Handle` points to, or a class derived from `Handle`.
//   //
//   // The result is non-const even if the `Manager` is stored inside the
//   // `Dependency`.
//   //
//   // Provided by `DependencyImpl`.
//   Handle get() const;
//
//   // If `Handle` is `Base*` or another dereferenceable type, `Dependency` can
//   // be used as a smart pointer to `Base`, for convenience.
//   //
//   // Provided by `Dependency`, not `DependencyImpl`.
//   Base& operator*() const { return *get(); }
//   Base* operator->() const { return get(); }
//
//   // If `Handle` is `Base*` or another type comparable against `nullptr`,
//   // `Dependency` can be compared against `nullptr`.
//   //
//   // Provided by `Dependency`, not `DependencyImpl`.
//   friend bool operator==(const Dependency& a, std::nullptr_t) {
//     return a.get() == nullptr;
//   }
//
//   // If `true`, the `Dependency` owns the dependent object, i.e. closing the
//   // host object should close the dependent object.
//   //
//   // Provided by `DependencyManagerImpl`, `DependencyImpl`, or `Dependency`.
//   // In `Dependency` implemented in terms of `kIsOwning`.
//   bool IsOwning() const;
//
//   // The value of `IsOwning()` if known statically or mostly statically.
//   //
//   // This constant is optional.
//   //
//   // If `IsOwning()` returns a statically known constant, `kIsOwning` should
//   // be defined. `Dependency` will provide `IsOwning()`.
//   //
//   // If `IsOwning()` returns `true` except for a sentinel value like
//   // `nullptr`, e.g. for `std::unique_ptr`, `kIsOwning` can still be defined
//   // in addition to `IsOwning()`. This allows to use the static
//   // approximatimation when static selection is needed, with the caveat that
//   // it will return `true` also for the sentinel value.
//   //
//   // Provided by `DependencyManagerImpl` or `DependencyImpl`.
//   static constexpt bool kIsOwning;
//
//   // If `true`, `get()` stays unchanged when a `Dependency` is moved.
//   //
//   // This can be used as an optimization to avoid recomputing values derived
//   // from them when a `Dependency` is moved.
//   //
//   // Provided by `DependencyBase`, `DependencyManagerImpl`, or
//   // `DependencyImpl`.
//   static constexpr bool kIsStable;
// ```

// `DependencyImpl` specializations provide what `DependencyBase` provides
// (constructors, `Reset()`, `manager()`, and `kIsStable`), and also `get()`,
// `IsOwning()`, and `kIsOwning`.

// This template is specialized but does not have a primary definition.
template <typename Handle, typename Manager, typename Enable = void>
class DependencyImpl;

// Specialization of `DependencyImpl<T*, Manager>` when
// `DependencyManagerPtr<Manager>` is a pointer convertible to `T*`.
template <typename T, typename Manager>
class DependencyImpl<
    T*, Manager,
    std::enable_if_t<std::conjunction_v<
        std::disjunction<
            std::is_pointer<DependencyManagerPtr<Manager>>,
            std::is_same<DependencyManagerPtr<Manager>, std::nullptr_t>>,
        std::is_convertible<DependencyManagerPtr<Manager>, T*>>>>
    : public DependencyManager<Manager> {
 public:
  using DependencyImpl::DependencyManager::DependencyManager;

  DependencyManagerPtr<Manager> get() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return this->ptr();
  }

 protected:
  DependencyImpl(const DependencyImpl& that) = default;
  DependencyImpl& operator=(const DependencyImpl& that) = default;

  DependencyImpl(DependencyImpl&& that) = default;
  DependencyImpl& operator=(DependencyImpl&& that) = default;

  ~DependencyImpl() = default;
};

// Specialization of `DependencyImpl<absl::Span<T>, Manager>` when
// `DependencyManagerRef<Manager>` is explicitly convertible to `absl::Span<T>`.
//
// Specialized separately for `get()` to return
// `absl::Span<std::remove_const_t<T>>` if possible.
template <typename T, typename Manager>
class DependencyImpl<
    absl::Span<T>, Manager,
    std::enable_if_t<std::conjunction_v<
        std::is_pointer<DependencyManagerPtr<Manager>>,
        std::is_constructible<absl::Span<T>, DependencyManagerRef<Manager>>>>>
    : public DependencyManager<Manager> {
 public:
  using DependencyImpl::DependencyManager::DependencyManager;

  // Return `absl::Span<std::remove_const_t<T>>` when
  // `DependencyManagerRef<Manager>` is convertible to it.
  template <typename DependentManager = Manager,
            std::enable_if_t<
                std::is_constructible_v<absl::Span<std::remove_const_t<T>>,
                                        DependencyManagerRef<DependentManager>>,
                int> = 0>
  absl::Span<std::remove_const_t<T>> get() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return absl::Span<std::remove_const_t<T>>(*this->ptr());
  }
  template <typename DependentManager = Manager,
            std::enable_if_t<!std::is_constructible_v<
                                 absl::Span<std::remove_const_t<T>>,
                                 DependencyManagerRef<DependentManager>>,
                             int> = 0>
  absl::Span<T> get() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return absl::Span<T>(*this->ptr());
  }

  static constexpr bool kIsStable =
      DependencyImpl::DependencyManager::kIsStable ||
      std::is_same_v<Manager, absl::Span<T>> ||
      std::is_same_v<Manager, absl::Span<std::remove_const_t<T>>>;

 protected:
  DependencyImpl(const DependencyImpl& that) = default;
  DependencyImpl& operator=(const DependencyImpl& that) = default;

  DependencyImpl(DependencyImpl&& that) = default;
  DependencyImpl& operator=(DependencyImpl&& that) = default;

  ~DependencyImpl() = default;
};

// Specialization of `DependencyImpl<absl::Span<T>, Manager>` when
// `DependencyManagerPtr<Manager>` is `absl::Span<T>` or
// `absl::Span<std::remove_const_t<T>>`.
//
// Specialized separately for `get()` to return
// `absl::Span<std::remove_const_t<T>>` if possible.
template <typename T, typename Manager>
class DependencyImpl<
    absl::Span<T>, Manager,
    std::enable_if_t<std::disjunction_v<
        std::is_same<DependencyManagerPtr<Manager>, absl::Span<T>>,
        std::is_same<DependencyManagerPtr<Manager>,
                     absl::Span<std::remove_const_t<T>>>>>>
    : public DependencyManager<Manager> {
 public:
  using DependencyImpl::DependencyManager::DependencyManager;

  DependencyManagerPtr<Manager> get() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return this->ptr();
  }

 protected:
  DependencyImpl(const DependencyImpl& that) = default;
  DependencyImpl& operator=(const DependencyImpl& that) = default;

  DependencyImpl(DependencyImpl&& that) = default;
  DependencyImpl& operator=(DependencyImpl&& that) = default;

  ~DependencyImpl() = default;
};

// Specialization of `DependencyImpl<absl::string_view, Manager>` when
// `DependencyManagerRef<Manager>` is convertible to `BytesRef`.
template <typename Manager>
class DependencyImpl<
    absl::string_view, Manager,
    std::enable_if_t<std::conjunction_v<
        std::is_pointer<DependencyManagerPtr<Manager>>,
        std::is_convertible<DependencyManagerRef<Manager>, BytesRef>>>>
    : public DependencyManager<Manager> {
 public:
  using DependencyImpl::DependencyManager::DependencyManager;

  absl::string_view get() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return BytesRef(*this->ptr());
  }

  static constexpr bool kIsStable =
      DependencyImpl::DependencyManager::kIsStable ||
      std::is_same_v<Manager, absl::string_view> ||
      std::is_same_v<Manager, absl::Span<const char>> ||
      std::is_same_v<Manager, absl::Span<char>>;

 protected:
  DependencyImpl(const DependencyImpl& that) = default;
  DependencyImpl& operator=(const DependencyImpl& that) = default;

  DependencyImpl(DependencyImpl&& that) = default;
  DependencyImpl& operator=(DependencyImpl&& that) = default;

  ~DependencyImpl() = default;
};

// Specialization of `DependencyImpl<absl::string_view, Manager>` when
// `DependencyManagerPtr<Manager>` is `absl::Span<const char>` or
// `absl::Span<char>`.
//
// Specialized separately because `absl::Span<const char>` is not convertible
// to `absl::string_view` in the regular way.
template <typename Manager>
class DependencyImpl<
    absl::string_view, Manager,
    std::enable_if_t<std::disjunction_v<
        std::is_same<DependencyManagerPtr<Manager>, absl::Span<const char>>,
        std::is_same<DependencyManagerPtr<Manager>, absl::Span<char>>>>>
    : public DependencyManager<Manager> {
 public:
  using DependencyImpl::DependencyManager::DependencyManager;

  absl::string_view get() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    const absl::Span<const char> span = this->ptr();
    return absl::string_view(span.data(), span.size());
  }

 protected:
  DependencyImpl(const DependencyImpl& that) = default;
  DependencyImpl& operator=(const DependencyImpl& that) = default;

  DependencyImpl(DependencyImpl&& that) = default;
  DependencyImpl& operator=(DependencyImpl&& that) = default;

  ~DependencyImpl() = default;
};

namespace dependency_internal {

// `SupportsDependencyImpl<Handle, Manager>::value` is `true` when
// `DependencyImpl<Handle, Manager>` is defined.

template <typename Handle, typename Manager, typename Enable = void>
struct SupportsDependencyImpl : std::false_type {};

template <typename Handle, typename Manager>
struct SupportsDependencyImpl<
    Handle, Manager,
    std::void_t<
        decltype(std::declval<const DependencyImpl<Handle, Manager>&>().get())>>
    : std::true_type {};

// `DependencyDefault<Handle, Manager>` extends
// `DependencyImpl<Handle, Manager>` with the basic cases when
// `DependencyManagerRef<Manager>` or `DependencyManagerPtr<Manager>` is
// explicitly convertible to `Handle`.

// This template is specialized but does not have a primary definition.
template <typename Handle, typename Manager, typename Enable = void>
class DependencyDefault;

// Specialization of `DependencyDefault<Handle, Manager>` when
// `DependencyImpl<Handle, Manager>` is defined: delegate to it.
template <typename Handle, typename Manager>
class DependencyDefault<
    Handle, Manager,
    std::enable_if_t<SupportsDependencyImpl<Handle, Manager>::value>>
    : public DependencyImpl<Handle, Manager> {
 public:
  using DependencyDefault::DependencyImpl::DependencyImpl;

  static_assert(
      std::is_convertible_v<
          decltype(std::declval<
                       const typename DependencyDefault::DependencyImpl&>()
                       .get()),
          Handle>,
      "DependencyImpl<Handle, Manager>::get() must return a subtype of Handle");

 protected:
  DependencyDefault(const DependencyDefault& that) = default;
  DependencyDefault& operator=(const DependencyDefault& that) = default;

  DependencyDefault(DependencyDefault&& that) = default;
  DependencyDefault& operator=(DependencyDefault&& that) = default;

  ~DependencyDefault() = default;
};

// Specialization of `DependencyDefault<Handle, Manager>` when
// `DependencyImpl<Handle, Manager>` is not defined and
// `DependencyManagerRef<Manager>` is explicitly convertible to `Handle`:
// let `get()` return `*ptr()`, as its original type if possible.
template <typename Handle, typename Manager>
class DependencyDefault<
    Handle, Manager,
    std::enable_if_t<std::conjunction_v<
        std::negation<SupportsDependencyImpl<Handle, Manager>>,
        std::is_pointer<DependencyManagerPtr<Manager>>,
        std::is_constructible<Handle, DependencyManagerRef<Manager>>>>>
    : public DependencyManager<Manager> {
 public:
  using DependencyDefault::DependencyManager::DependencyManager;

  // Return `DependencyManagerRef<Manager>` when it is a subclass of `Handle`.
  template <
      typename DependentManager = Manager,
      std::enable_if_t<std::is_convertible_v<
                           DependencyManagerPtr<DependentManager>, Handle*>,
                       int> = 0>
  DependencyManagerRef<DependentManager> get() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return *this->ptr();
  }
  template <
      typename DependentManager = Manager,
      std::enable_if_t<!std::is_convertible_v<
                           DependencyManagerPtr<DependentManager>, Handle*>,
                       int> = 0>
  Handle get() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return Handle(*this->ptr());
  }

  static constexpr bool kIsStable =
      DependencyDefault::DependencyManager::kIsStable ||
      std::is_convertible_v<DependencyManagerPtr<Manager>, Handle*>;

 protected:
  DependencyDefault(const DependencyDefault& that) = default;
  DependencyDefault& operator=(const DependencyDefault& that) = default;

  DependencyDefault(DependencyDefault&& that) = default;
  DependencyDefault& operator=(DependencyDefault&& that) = default;

  ~DependencyDefault() = default;
};

// Specialization of `DependencyDefault<Handle, Manager>` when
// `DependencyImpl<Handle, Manager>` is not defined,
// `DependencyManagerRef<Manager>` is not convertible to `Handle`, and
// `DependencyManagerPtr<Manager>` is explicitly convertible to `Handle`:
// let `get()` return `ptr()`, as its original type if possible.
template <typename Handle, typename Manager>
class DependencyDefault<
    Handle, Manager,
    std::enable_if_t<std::conjunction_v<
        std::negation<SupportsDependencyImpl<Handle, Manager>>,
        std::negation<std::conjunction<
            std::is_pointer<DependencyManagerPtr<Manager>>,
            std::is_constructible<Handle, DependencyManagerRef<Manager>>>>,
        std::is_constructible<Handle, DependencyManagerPtr<Manager>>>>>
    : public DependencyManager<Manager> {
 public:
  using DependencyDefault::DependencyManager::DependencyManager;

  // Return `DependencyManagerPtr<Manager>` when it is a subclass of `Handle`.
  template <
      typename DependentManager = Manager,
      std::enable_if_t<std::is_convertible_v<
                           DependencyManagerPtr<DependentManager>*, Handle*>,
                       int> = 0>
  DependencyManagerPtr<DependentManager> get() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return this->ptr();
  }
  template <
      typename DependentManager = Manager,
      std::enable_if_t<!std::is_convertible_v<
                           DependencyManagerPtr<DependentManager>*, Handle*>,
                       int> = 0>
  Handle get() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return Handle(this->ptr());
  }

  static constexpr bool kIsStable =
      DependencyDefault::DependencyManager::kIsStable ||
      std::is_convertible_v<DependencyManagerPtr<Manager>*, Handle*>;

 protected:
  DependencyDefault(const DependencyDefault& that) = default;
  DependencyDefault& operator=(const DependencyDefault& that) = default;

  DependencyDefault(DependencyDefault&& that) = default;
  DependencyDefault& operator=(DependencyDefault&& that) = default;

  ~DependencyDefault() = default;
};

// `SupportsDependencyDefault<Handle, Manager>::value` is `true` when
// `DependencyDefault<Handle, Manager, Manager&>` is defined.
template <typename Handle, typename Manager>
struct SupportsDependencyDefault
    : std::disjunction<
          dependency_internal::SupportsDependencyImpl<Handle, Manager>,
          std::conjunction<
              std::is_pointer<DependencyManagerPtr<Manager>>,
              std::is_constructible<Handle, DependencyManagerRef<Manager>>>,
          std::is_constructible<Handle, DependencyManagerPtr<Manager>>> {};

// `DependencyDeref<Handle, Manager>` extends
// `DependencyDefault<Handle, Manager>` with cases where `Manager` is
// a reference, if `DependencyImpl<Handle, Manager>` is not defined.
//
// If `DependencyImpl<Handle, Manager>` uses `DependencyManager<Manager>`, then
// this is already covered. Custom specializations might not cover this.

// This template is specialized but does not have a primary definition.
template <typename Handle, typename Manager, typename Enable = void>
class DependencyDeref;

// Specialization of `DependencyDeref<Handle, Manager>` when
// `DependencyDefault<Handle, Manager>` is defined: delegate to it.
template <typename Handle, typename Manager>
class DependencyDeref<
    Handle, Manager,
    std::enable_if_t<SupportsDependencyDefault<Handle, Manager>::value>>
    : public DependencyDefault<Handle, Manager> {
 public:
  using DependencyDeref::DependencyDefault::DependencyDefault;

 protected:
  DependencyDeref(const DependencyDeref& that) = default;
  DependencyDeref& operator=(const DependencyDeref& that) = default;

  DependencyDeref(DependencyDeref&& that) = default;
  DependencyDeref& operator=(DependencyDeref&& that) = default;

  ~DependencyDeref() = default;
};

// Specialization of `DependencyDeref<Handle, Manager>` when
// `DependencyDefault<Handle, Manager>` is not defined,
// `Manager` is a reference, and
// `DependencyDefault<Handle, absl::remove_cvref_t<Manager>>` is defined:
// delegate to the latter.
template <typename Handle, typename Manager>
class DependencyDeref<
    Handle, Manager,
    std::enable_if_t<std::conjunction_v<
        std::is_reference<Manager>,
        std::negation<SupportsDependencyDefault<Handle, Manager>>,
        SupportsDependencyDefault<Handle, absl::remove_cvref_t<Manager>>>>>
    : public DependencyDefault<Handle, absl::remove_cvref_t<Manager>> {
 public:
  using DependencyDeref::DependencyDefault::DependencyDefault;

 protected:
  DependencyDeref(const DependencyDeref& that) = default;
  DependencyDeref& operator=(const DependencyDeref& that) = default;

  DependencyDeref(DependencyDeref&& that) = default;
  DependencyDeref& operator=(DependencyDeref&& that) = default;

  ~DependencyDeref() = default;
};

// `SupportsDependencyDeref<Handle, Manager>::value` is `true` when
// `DependencyDeref<Handle, Manager>` is defined.
template <typename Handle, typename Manager>
struct SupportsDependencyDeref
    : std::disjunction<
          SupportsDependencyDefault<Handle, Manager>,
          std::conjunction<std::is_reference<Manager>,
                           SupportsDependencyDefault<
                               Handle, absl::remove_cvref_t<Manager>>>> {};

}  // namespace dependency_internal

// `SupportsDependency<Handle, Manager>::value` is `true` when
// `Dependency<Handle, Manager>` is defined and usable, i.e. constructible from
// `Initializer<Manager>`.
//
// An immovable `Manager` is usable when the `Initializer<Manager>` has been
// constructed from `riegeli::Maker()` or `riegeli::Invoker()`, not from an
// already constructed object.
template <typename Handle, typename Manager>
struct SupportsDependency
    : std::conjunction<
          dependency_internal::SupportsDependencyDeref<Handle, Manager>> {};

// `TargetSupportsDependency<Handle, Manager>::value` is `true` when
// `Dependency<Handle, TargetT<Manager>>` is defined and constructible from
// `Manager&&`.
//
// An immovable `TargetT<Manager>` is usable when the `Dependency` has been
// initialized with `riegeli::Maker()` or `riegeli::Invoker()`, possibly behind
// `Initializer`, not from an already constructed object.
template <typename Handle, typename Manager>
struct TargetSupportsDependency
    : std::conjunction<
          SupportsDependency<Handle, TargetT<Manager>>,
          std::is_convertible<Manager&&, Initializer<TargetT<Manager>>>> {};

// `TargetRefSupportsDependency<Handle, Manager>::value` is `true` when
// `DependencyRef<Handle, Manager>` i.e.
// `Dependency<Handle, TargetRefT<Manager>>` is defined and constructible from
// `Manager&&`.
//
// An immovable `TargetRefT<Manager>` is usable when the `Dependency` has been
// initialized with `riegeli::Maker()` or `riegeli::Invoker()`, possibly behind
// `Initializer`, not from an already constructed object.
template <typename Handle, typename Manager>
struct TargetRefSupportsDependency
    : std::conjunction<
          SupportsDependency<Handle, TargetRefT<Manager>>,
          std::is_convertible<Manager&&, Initializer<TargetRefT<Manager>>>> {};

namespace dependency_internal {

template <bool value>
struct IsConstexprBool : std::true_type {};

}  // namespace dependency_internal

// `HasStaticIsOwning<T>::value` is `true` if `T` defines
// `static constexpr bool kIsOwning`.

template <typename T, typename Enable = void>
struct HasStaticIsOwning : std::false_type {};

template <typename T>
struct HasStaticIsOwning<
    T,
    std::enable_if_t<dependency_internal::IsConstexprBool<T::kIsOwning>::value>>
    : std::true_type {};

// Deriving a class from `PropagateStaticIsOwning<T>` defines
// `static constexpr bool kIsOwning = T::kIsOwning` if `T` defines `kIsOwning`.

template <typename T, typename Enable = void>
class PropagateStaticIsOwning {};

template <typename T>
class PropagateStaticIsOwning<T,
                              std::enable_if_t<HasStaticIsOwning<T>::value>> {
 public:
  static constexpr bool kIsOwning = T::kIsOwning;
};

namespace dependency_internal {

template <typename T, typename Enable = void>
struct HasDynamicIsOwning : std::false_type {};

template <typename T>
struct HasDynamicIsOwning<
    T, std::enable_if_t<std::is_convertible_v<
           decltype(std::declval<const T&>().IsOwning()), bool>>>
    : std::true_type {};

// `DependencyDerived` adds `Dependency` and `StableDependency` operations
// uniformly implemented in terms of other operations: `operator*`,
// `operator->`, and comparisons against `nullptr`.
//
// It derives from the template parameter `Base` so that it can be used in
// `Dependency` (applied to `DependencyDeref`) and `StableDependency`
// (applied to `StableDependencyImpl`).
template <typename Base, typename Handle, typename Manager>
class DependencyDerived
    : public Base,
      public WithEqual<DependencyDerived<Base, Handle, Manager>> {
 public:
  using Base::Base;

  using Subhandle = decltype(std::declval<const Base&>().get());

  template <
      typename DependentSubhandle = Subhandle,
      std::enable_if_t<HasDereference<DependentSubhandle>::value, int> = 0>
  decltype(*std::declval<DependentSubhandle>()) operator*() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    Subhandle handle = this->get();
    AssertNotNull(handle,
                  "Failed precondition of Dependency::operator*: null handle");
    return *std::move(handle);
  }

  template <typename DependentSubhandle = Subhandle,
            std::enable_if_t<HasArrow<DependentSubhandle>::value, int> = 0>
  Subhandle operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    Subhandle handle = this->get();
    AssertNotNull(handle,
                  "Failed precondition of Dependency::operator->: null handle");
    return handle;
  }

  template <typename DependentSubhandle = Subhandle,
            std::enable_if_t<
                IsComparableAgainstNullptr<DependentSubhandle>::value, int> = 0>
  friend bool operator==(const DependencyDerived& a, std::nullptr_t) {
    return a.get() == nullptr;
  }

  template <typename DependentBase = Base,
            std::enable_if_t<HasDynamicIsOwning<DependentBase>::value, int> = 0>
  bool IsOwning() const {
    return Base::IsOwning();
  }
  template <
      typename DependentBase = Base,
      std::enable_if_t<
          std::conjunction_v<std::negation<HasDynamicIsOwning<DependentBase>>,
                             HasStaticIsOwning<DependentBase>>,
          int> = 0>
  bool IsOwning() const {
    return Base::kIsOwning;
  }

 protected:
  DependencyDerived(const DependencyDerived& that) = default;
  DependencyDerived& operator=(const DependencyDerived& that) = default;

  DependencyDerived(DependencyDerived&& that) = default;
  DependencyDerived& operator=(DependencyDerived&& that) = default;

  ~DependencyDerived() = default;

 private:
  template <typename DependentSubhandle = Subhandle,
            std::enable_if_t<
                IsComparableAgainstNullptr<DependentSubhandle>::value, int> = 0>
  static void AssertNotNull(Subhandle handle, absl::string_view message) {
    RIEGELI_ASSERT(handle != nullptr) << message;
  }
  template <
      typename DependentSubhandle = Subhandle,
      std::enable_if_t<!IsComparableAgainstNullptr<DependentSubhandle>::value,
                       int> = 0>
  static void AssertNotNull(ABSL_ATTRIBUTE_UNUSED Subhandle handle,
                            ABSL_ATTRIBUTE_UNUSED absl::string_view message) {}
};

}  // namespace dependency_internal

template <typename Handle, typename Manager>
class Dependency : public dependency_internal::DependencyDerived<
                       dependency_internal::DependencyDeref<Handle, Manager>,
                       Handle, Manager> {
 public:
  using Dependency::DependencyDerived::DependencyDerived;

  Dependency(const Dependency& that) = default;
  Dependency& operator=(const Dependency& that) = default;

  Dependency(Dependency&& that) = default;
  Dependency& operator=(Dependency&& that) = default;
};

// `DependencyRef<Handle, Manager>` is an alias for
// `Dependency<Handle, TargetRefT<Manager>>`.
template <typename Handle, typename Manager>
using DependencyRef = Dependency<Handle, TargetRefT<Manager>>;

namespace dependency_internal {

// `AlwaysFalse<T...>::value` is `false`, but formally depends on `T...`.
// This is useful for `static_assert()`.

template <typename... T>
struct AlwaysFalse : std::false_type {};

}  // namespace dependency_internal

// A placeholder `Dependency` manager to be deduced by CTAD, used to delete CTAD
// for particular constructor argument types.
//
// It takes `ConstructorArgTypes` so that an error message from the
// `static_assert()` can show them.

template <typename... ConstructorArgTypes>
struct DeleteCtad {
  DeleteCtad() = delete;
};

template <typename Handle, typename... ConstructorArgTypes>
class Dependency<Handle, DeleteCtad<ConstructorArgTypes...>> {
  static_assert(dependency_internal::AlwaysFalse<ConstructorArgTypes...>::value,
                "Template arguments must be written explicitly "
                "with these constructor argument types");
};

}  // namespace riegeli

#endif  // RIEGELI_BASE_DEPENDENCY_H_


================================================
FILE: riegeli/base/dependency_base.h
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_DEPENDENCY_BASE_H_
#define RIEGELI_BASE_DEPENDENCY_BASE_H_

#include <stddef.h>

#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/reset.h"

namespace riegeli {

// `RiegeliDependencySentinel(T*)` specifies how to initialize a default
// `Manager` (for `Dependency`) or `Handle` (for `Any`) of type `T`.
//
// To customize that for a class `T`, define a free function
// `friend Result RiegeliDependencySentinel(T*)` as a friend of `T` inside class
// definition or in the same namespace as `T`, so that it can be found via ADL.
//
// `RiegeliDependencySentinel(T*)` returns a value convertible to
// `Initializer<T>`, usually a `MakerType<Args...>`.
//
// The argument of `RiegeliDependencySentinel(T*)` is always a null pointer,
// used to choose the right overload based on the type.

inline MakerType<> RiegeliDependencySentinel(void*) { return {}; }

// Implementation shared between most specializations of `DependencyManagerImpl`
// and `DependencyImpl` which store `manager()` in a member variable.
//
// `DependencyBase` provides constructors, `Reset()`, `manager()`, `kIsStable`,
// and protected `mutable_manager()`.
template <typename Manager>
class DependencyBase {
 public:
  template <typename DependentManager = Manager,
            std::enable_if_t<std::is_convertible_v<
                                 decltype(RiegeliDependencySentinel(
                                     static_cast<DependentManager*>(nullptr))),
                                 Initializer<DependentManager>>,
                             int> = 0>
  DependencyBase() noexcept
      : DependencyBase(
            RiegeliDependencySentinel(static_cast<Manager*>(nullptr))) {}

  explicit DependencyBase(Initializer<Manager> manager)
      : manager_(std::move(manager)) {}

  template <
      typename DependentManager = Manager,
      std::enable_if_t<
          std::conjunction_v<
              std::is_convertible<decltype(RiegeliDependencySentinel(
                                      static_cast<DependentManager*>(nullptr))),
                                  Initializer<DependentManager>>,
              std::is_move_assignable<DependentManager>>,
          int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset() {
    Reset(RiegeliDependencySentinel(static_cast<Manager*>(nullptr)));
  }
  template <
      typename DependentManager = Manager,
      std::enable_if_t<std::is_move_assignable_v<DependentManager>, int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Manager> manager) {
    riegeli::Reset(manager_, std::move(manager));
  }

  Manager& manager() ABSL_ATTRIBUTE_LIFETIME_BOUND { return manager_; }
  const Manager& manager() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return manager_;
  }

  static constexpr bool kIsStable = false;

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const DependencyBase* self,
                                        MemoryEstimator& memory_estimator) {
    memory_estimator.RegisterSubobjects(&self->manager_);
  }

 protected:
  DependencyBase(const DependencyBase& that) = default;
  DependencyBase& operator=(const DependencyBase& that) = default;

  DependencyBase(DependencyBase&& that) = default;
  DependencyBase& operator=(DependencyBase&& that) = default;

  ~DependencyBase() = default;

  Manager& mutable_manager() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return manager_;
  }

 private:
  mutable Manager manager_;
};

// Specialization of `DependencyBase` for lvalue references.
//
// Only a subset of operations is provided: the dependency must be initialized,
// and assignment is not supported.
template <typename Manager>
class DependencyBase<Manager&> {
 public:
  explicit DependencyBase(Initializer<Manager&> manager) noexcept
      : manager_(std::move(manager)) {}

  Manager& manager() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return manager_; }

  static constexpr bool kIsStable = true;

 protected:
  DependencyBase(const DependencyBase& that) = default;
  DependencyBase& operator=(const DependencyBase&) = delete;

  ~DependencyBase() = default;

  Manager& mutable_manager() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return manager_;
  }

 private:
  Manager& manager_;
};

// Specialization of `DependencyBase` for rvalue references.
//
// Only a subset of operations is provided: the dependency must be initialized,
// and assignment is not supported.
template <typename Manager>
class DependencyBase<Manager&&> {
 public:
  explicit DependencyBase(Initializer<Manager&&> manager) noexcept
      : manager_(std::move(manager)) {}

  Manager& manager() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return manager_; }

  static constexpr bool kIsStable = true;

 protected:
  DependencyBase(DependencyBase&& that) = default;
  DependencyBase& operator=(DependencyBase&&) = delete;

  ~DependencyBase() = default;

  Manager& mutable_manager() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return manager_;
  }

 private:
  Manager&& manager_;
};

}  // namespace riegeli

#endif  // RIEGELI_BASE_DEPENDENCY_BASE_H_


================================================
FILE: riegeli/base/dependency_manager.h
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_DEPENDENCY_MANAGER_H_
#define RIEGELI_BASE_DEPENDENCY_MANAGER_H_

#include <cstddef>
#include <memory>
#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/meta/type_traits.h"
#include "riegeli/base/dependency_base.h"

namespace riegeli {

// `DependencyManager<Manager>` provides a preliminary interpretation of
// `Manager` as a pointer or pointer-like type, in the form of a protected
// member function `ptr()`. It is used by `DependencyImpl` specializations to
// infer `get()`, which often returns `*ptr()` or `ptr()`, depending on which
// of them is convertible to `Handle`.
//
// Examples:
//  * `T* DependencyManager<T>::ptr()`
//  * `T* DependencyManager<T&>::ptr()`
//  * `T* DependencyManager<T&&>::ptr()`
//  * `T* DependencyManager<T*>::ptr()`
//  * `std::nullptr_t DependencyManager<std::nullptr_t>::ptr()`
//  * `T* DependencyManager<std::unique_ptr<T, Deleter>>::ptr()`
//  * `T* DependencyManager<std::optional<T>>::ptr()`
//  * `Handle DependencyManager<Any<Handle>>::ptr()`
//
// `DependencyManager<Manager>` derives from
// `DependencyManagerImpl<Manager, ManagerStorage>` (where `ManagerStorage` is
// `Manager`, `Manager&`, or `Manager&&`) which has specializations for various
// `Manager` types.
//
// `DependencyManagerImpl<Manager, ManagerStorage>` specializations often derive
// from `DependencyBase<ManagerStorage>` (or from `DependencyBase<Manager>` if
// `Manager` is cheap to move).
//
// `DependencyManager` provides what `DependencyBase` provides (constructors,
// `Reset()`, `manager()`, and `kIsStable`), and also `ptr()`, `IsOwning()`,
// and `kIsOwning`.

// This template is specialized but does not have a primary definition.
template <typename Manager, typename ManagerStorage, typename Enable = void>
class DependencyManagerImpl;

// Specialization of `DependencyManagerImpl<T*, ManagerStorage>`: an unowned
// dependency stored by pointer.
template <typename T, typename ManagerStorage>
class DependencyManagerImpl<T*, ManagerStorage> : public DependencyBase<T*> {
 public:
  using DependencyManagerImpl::DependencyBase::DependencyBase;

  static constexpr bool kIsOwning = false;

  static constexpr bool kIsStable = true;

 protected:
  DependencyManagerImpl(const DependencyManagerImpl& that) = default;
  DependencyManagerImpl& operator=(const DependencyManagerImpl& that) = default;

  DependencyManagerImpl(DependencyManagerImpl&& that) = default;
  DependencyManagerImpl& operator=(DependencyManagerImpl&& that) = default;

  ~DependencyManagerImpl() = default;

  T* ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return this->manager(); }
};

// Specialization of `DependencyManagerImpl<std::nullptr_t, ManagerStorage>`:
// an unowned dependency stored by pointer, always missing. This is useful for
// `Any` and `AnyRef`.
template <typename ManagerStorage>
class DependencyManagerImpl<std::nullptr_t, ManagerStorage>
    : public DependencyBase<std::nullptr_t> {
 public:
  using DependencyManagerImpl::DependencyBase::DependencyBase;

  static constexpr bool kIsOwning = false;

  static constexpr bool kIsStable = true;

 protected:
  DependencyManagerImpl(const DependencyManagerImpl& that) = default;
  DependencyManagerImpl& operator=(const DependencyManagerImpl& that) = default;

  DependencyManagerImpl(DependencyManagerImpl&& that) = default;
  DependencyManagerImpl& operator=(DependencyManagerImpl&& that) = default;

  ~DependencyManagerImpl() = default;

  std::nullptr_t ptr() const { return nullptr; }
};

// Specialization of
// `DependencyManagerImpl<std::unique_ptr<T, Deleter>, ManagerStorage>`:
// an owned dependency stored by `std::unique_ptr`.
template <typename T, typename Deleter, typename ManagerStorage>
class DependencyManagerImpl<std::unique_ptr<T, Deleter>, ManagerStorage>
    : public DependencyBase<
          std::conditional_t<std::is_empty_v<Deleter>,
                             std::unique_ptr<T, Deleter>, ManagerStorage>> {
 public:
  using DependencyManagerImpl::DependencyBase::DependencyBase;

  bool IsOwning() const { return this->manager() != nullptr; }

  static constexpr bool kIsOwning = true;

  static constexpr bool kIsStable = true;

 protected:
  DependencyManagerImpl(DependencyManagerImpl&& that) = default;
  DependencyManagerImpl& operator=(DependencyManagerImpl&& that) = default;

  ~DependencyManagerImpl() = default;

  T* ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return this->manager().get(); }
};

// Specialization of
// `DependencyManagerImpl<std::optional<T>, ManagerStorage>`:
// an owned dependency stored by `std::optional`.
template <typename T, typename ManagerStorage>
class DependencyManagerImpl<std::optional<T>, ManagerStorage>
    : public DependencyBase<ManagerStorage> {
 public:
  using DependencyManagerImpl::DependencyBase::DependencyBase;

  bool IsOwning() const { return this->manager() != std::nullopt; }

  static constexpr bool kIsOwning = true;

 protected:
  DependencyManagerImpl(const DependencyManagerImpl& that) = default;
  DependencyManagerImpl& operator=(const DependencyManagerImpl& that) = default;

  DependencyManagerImpl(DependencyManagerImpl&& that) = default;
  DependencyManagerImpl& operator=(DependencyManagerImpl&& that) = default;

  ~DependencyManagerImpl() = default;

  T* ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    if (this->mutable_manager() == std::nullopt) return nullptr;
    return &*this->mutable_manager();
  }
};

namespace dependency_manager_internal {

// `SupportsDependencyManagerImpl<Manager>::value` is `true` when
// `DependencyManagerImpl<Manager, Manager>` is defined.

template <typename Manager, typename Enable = void>
struct SupportsDependencyManagerImpl : std::false_type {};

template <typename Manager>
struct SupportsDependencyManagerImpl<
    Manager,
    std::void_t<
        decltype(std::declval<const DependencyManagerImpl<Manager, Manager>&>()
                     .manager())>> : std::true_type {};

}  // namespace dependency_manager_internal

// `DependencyManager<Manager>` extends
// `DependencyManagerImpl<Manager, ManagerStorage>` with the basic case when
// `Manager` is an owned dependency stored by value, and with specializations
// when `Manager` is `T&` or `T&&`.

template <typename Manager, typename Enable = void>
class DependencyManager;

// Specialization of `DependencyManager<Manager>` when
// `DependencyManagerImpl<Manager>` is defined: delegate to it.
template <typename Manager>
class DependencyManager<
    Manager,
    std::enable_if_t<std::conjunction_v<
        std::negation<std::is_reference<Manager>>,
        dependency_manager_internal::SupportsDependencyManagerImpl<Manager>>>>
    : public DependencyManagerImpl<Manager, Manager> {
 public:
  using DependencyManager::DependencyManagerImpl::DependencyManagerImpl;

  static_assert(
      std::is_convertible_v<
          decltype(std::declval<
                       typename DependencyManager::DependencyManagerImpl&>()
                       .manager()),
          Manager&>,
      "DependencyManagerImpl<Manager, Manager>::manager() "
      "must return Manager&");

 protected:
  DependencyManager(const DependencyManager& that) = default;
  DependencyManager& operator=(const DependencyManager& that) = default;

  DependencyManager(DependencyManager&& that) = default;
  DependencyManager& operator=(DependencyManager&& that) = default;

  ~DependencyManager() = default;
};

// Specialization of `DependencyManager<Manager>` when
// `DependencyManagerImpl<Manager>` is not defined: an owned dependency stored
// by value.
template <typename Manager>
class DependencyManager<
    Manager, std::enable_if_t<std::conjunction_v<
                 std::negation<std::is_reference<Manager>>,
                 std::negation<dependency_manager_internal::
                                   SupportsDependencyManagerImpl<Manager>>>>>
    : public DependencyBase<Manager> {
 public:
  using DependencyManager::DependencyBase::DependencyBase;

  static constexpr bool kIsOwning = true;

 protected:
  DependencyManager(const DependencyManager& that) = default;
  DependencyManager& operator=(const DependencyManager& that) = default;

  DependencyManager(DependencyManager&& that) = default;
  DependencyManager& operator=(DependencyManager&& that) = default;

  ~DependencyManager() = default;

  Manager* ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return &this->mutable_manager();
  }
};

// Specialization of `DependencyManager<Manager&>` when
// `DependencyManagerImpl<absl::remove_cvref_t<Manager>>` is defined:
// delegate to it, but store `absl::remove_cvref_t<Manager>` by reference
// to avoid moving it.
//
// This handles cases where `Manager` is deduced from a function parameter
// as a reference type, but the type under the reference determines the
// interpretation, e.g. `T*&`.
template <typename Manager>
class DependencyManager<
    Manager&,
    std::enable_if_t<dependency_manager_internal::SupportsDependencyManagerImpl<
        absl::remove_cvref_t<Manager>>::value>>
    : public DependencyManagerImpl<absl::remove_cvref_t<Manager>,
                                   absl::remove_cvref_t<Manager>&> {
 public:
  using DependencyManager::DependencyManagerImpl::DependencyManagerImpl;

  static_assert(
      std::is_convertible_v<
          decltype(std::declval<
                       typename DependencyManager::DependencyManagerImpl&>()
                       .manager()),
          Manager&>,
      "DependencyManagerImpl<Manager, Manager&>::manager() "
      "must return Manager&");

 protected:
  DependencyManager(const DependencyManager& that) = default;
  DependencyManager& operator=(const DependencyManager& that) = default;

  DependencyManager(DependencyManager&& that) = default;
  DependencyManager& operator=(DependencyManager&& that) = default;

  ~DependencyManager() = default;
};

// Specialization of `DependencyManager<Manager&>` when
// `DependencyManagerImpl<absl::remove_cvref_t<Manager>>` is not defined:
// an unowned dependency stored by lvalue reference.
template <typename Manager>
class DependencyManager<
    Manager&, std::enable_if_t<
                  !dependency_manager_internal::SupportsDependencyManagerImpl<
                      absl::remove_cvref_t<Manager>>::value>>
    : public DependencyBase<Manager&> {
 public:
  using DependencyManager::DependencyBase::DependencyBase;

  static constexpr bool kIsOwning = false;

 protected:
  DependencyManager(const DependencyManager& that) = default;
  DependencyManager& operator=(const DependencyManager&) = delete;

  ~DependencyManager() = default;

  Manager* ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return &this->manager();
  }
};

// Specialization of `DependencyManager<Manager&&>` when
// `DependencyManagerImpl<absl::remove_cvref_t<Manager>>` is defined:
// delegate to it, but store `absl::remove_cvref_t<Manager>` by reference
// to avoid moving it.
//
// This handles cases where `Manager` is deduced from a function parameter
// as a reference type, but the type under the reference determines the
// interpretation, e.g. `std::unique_ptr<T>&&`.
template <typename Manager>
class DependencyManager<
    Manager&&,
    std::enable_if_t<dependency_manager_internal::SupportsDependencyManagerImpl<
        absl::remove_cvref_t<Manager>>::value>>
    : public DependencyManagerImpl<absl::remove_cvref_t<Manager>,
                                   absl::remove_cvref_t<Manager>&&> {
 public:
  using DependencyManager::DependencyManagerImpl::DependencyManagerImpl;

  static_assert(
      std::is_convertible_v<
          decltype(std::declval<
                       typename DependencyManager::DependencyManagerImpl&>()
                       .manager()),
          Manager&>,
      "DependencyManagerImpl<Manager, Manager&&>::manager() "
      "must return Manager&");

 protected:
  DependencyManager(const DependencyManager& that) = default;
  DependencyManager& operator=(const DependencyManager& that) = default;

  DependencyManager(DependencyManager&& that) = default;
  DependencyManager& operator=(DependencyManager&& that) = default;

  ~DependencyManager() = default;
};

// Specialization of `DependencyManager<Manager&&>` when
// `DependencyManagerImpl<absl::remove_cvref_t<Manager>>` is not defined: an
// owned dependency stored by rvalue reference.
template <typename Manager>
class DependencyManager<
    Manager&&, std::enable_if_t<
                   !dependency_manager_internal::SupportsDependencyManagerImpl<
                       absl::remove_cvref_t<Manager>>::value>>
    : public DependencyBase<Manager&&> {
 public:
  using DependencyManager::DependencyBase::DependencyBase;

  static constexpr bool kIsOwning = true;

 protected:
  DependencyManager(DependencyManager&& that) = default;
  DependencyManager& operator=(DependencyManager&&) = delete;

  ~DependencyManager() = default;

  Manager* ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return &this->manager();
  }
};

namespace dependency_manager_internal {

// Expose protected `DependencyManager::ptr()` for `DependencyManagerPtr`.
template <typename Manager>
struct DependencyManagerAccess : DependencyManager<Manager> {
  using DependencyManagerAccess::DependencyManager::ptr;
};

// `DependencyManagerPtrImpl<Manager>::type` is the type returned by
// `DependencyManager<Manager>::ptr()`.
template <typename Manager, typename Enable = void>
struct DependencyManagerPtrImpl {
  using type =
      decltype(std::declval<const DependencyManagerAccess<Manager>&>().ptr());
};

// In `DependencyManagerPtrImpl<Manager>` for `Manager` stored by value, avoid
// instantiating `DependencyManager<Manager>` just to see what its `ptr()` would
// return. This could lead to subtle compile errors, causing the following chain
// of template instantiations:
//
//  * `TargetRefSupportsDependency<X*, Abstract&>`
//  * `SupportsDependencyInit<X*, Abstract&, Abstract&>`
//  * `SupportsDependencyDeref<X*, Abstract&>`
//  * `SupportsDependencyDefault<X*, Abstract>`
//  * `DependencyManagerPtr<Abstract>`
//  * `DependencyManager<Abstract>`
//  * `DependencyBase<Abstract>`
//
// which contains a member variable of an abstract type.
template <typename Manager>
struct DependencyManagerPtrImpl<
    Manager, std::enable_if_t<std::conjunction_v<
                 std::negation<std::is_reference<Manager>>,
                 std::negation<SupportsDependencyManagerImpl<Manager>>>>> {
  using type = Manager*;
};

template <typename Manager>
struct DependencyManagerRefImpl {
  using type = Manager;
};

template <typename Manager>
struct DependencyManagerRefImpl<Manager*> {
  using type = Manager&;
};

}  // namespace dependency_manager_internal

// `DependencyManagerPtr<Manager>` is the type returned by
// `DependencyManager<Manager>::ptr()`.
template <typename Manager>
using DependencyManagerPtr =
    typename dependency_manager_internal::DependencyManagerPtrImpl<
        Manager>::type;

// `DependencyManagerRef<Manager>` is `DependencyManagerPtr<Manager>` with the
// toplevel pointer changed to lvalue reference, if any.
//
// This should normally be used under the condition that
// `std::is_pointer_v<DependencyManagerPtr<Manager>>`.
template <typename Manager>
using DependencyManagerRef =
    typename dependency_manager_internal::DependencyManagerRefImpl<
        DependencyManagerPtr<Manager>>::type;

}  // namespace riegeli

#endif  // RIEGELI_BASE_DEPENDENCY_MANAGER_H_


================================================
FILE: riegeli/base/errno_mapping.cc
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#endif

#include "riegeli/base/errno_mapping.h"

#ifdef _WIN32
#include <stdint.h>
#include <windows.h>
#endif

#include <cerrno>

#include "absl/status/status.h"
#ifdef _WIN32
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/unicode.h"
#endif

namespace riegeli {

#ifdef _WIN32

namespace {

absl::StatusCode WindowsErrorToStatusCode(DWORD error_number) {
  switch (error_number) {
    case ERROR_SUCCESS:
      return absl::StatusCode::kOk;
    case ERROR_OPERATION_ABORTED:
      return absl::StatusCode::kCancelled;
    case ERROR_INVALID_HANDLE:
    case ERROR_INVALID_PARAMETER:
    case ERROR_BUFFER_OVERFLOW:
    case ERROR_INVALID_NAME:
    case ERROR_NEGATIVE_SEEK:
    case ERROR_DIRECTORY:
    case ERROR_REPARSE_TAG_INVALID:
    case WSAEFAULT:
    case WSAEINVAL:
    case WSAENAMETOOLONG:
      return absl::StatusCode::kInvalidArgument;
    case ERROR_FILE_NOT_FOUND:
    case ERROR_PATH_NOT_FOUND:
    case ERROR_INVALID_DRIVE:
    case ERROR_BAD_UNIT:
    case ERROR_BAD_NETPATH:
    case ERROR_DEV_NOT_EXIST:
    case ERROR_BAD_PATHNAME:
      return absl::StatusCode::kNotFound;
    case ERROR_FILE_EXISTS:
    case ERROR_ALREADY_EXISTS:
      return absl::StatusCode::kAlreadyExists;
    case ERROR_ACCESS_DENIED:
    case ERROR_INVALID_ACCESS:
    case ERROR_CURRENT_DIRECTORY:
    case ERROR_WRITE_PROTECT:
    case ERROR_SHARING_VIOLATION:
    case ERROR_CANNOT_MAKE:
    case ERROR_NOACCESS:
    case WSAEACCES:
      return absl::StatusCode::kPermissionDenied;
    case ERROR_TOO_MANY_OPEN_FILES:
    case ERROR_NOT_ENOUGH_MEMORY:
    case ERROR_OUTOFMEMORY:
    case ERROR_HANDLE_DISK_FULL:
    case ERROR_DISK_FULL:
    case WSAEMFILE:
      return absl::StatusCode::kResourceExhausted;
    case ERROR_BROKEN_PIPE:
    case ERROR_BUSY_DRIVE:
    case ERROR_DIR_NOT_EMPTY:
    case ERROR_BUSY:
    case ERROR_OPEN_FILES:
    case ERROR_DEVICE_IN_USE:
    case WSAEBADF:
      return absl::StatusCode::kFailedPrecondition;
    case ERROR_HANDLE_EOF:
      return absl::StatusCode::kOutOfRange;
    case ERROR_INVALID_FUNCTION:
    case ERROR_NOT_SUPPORTED:
      return absl::StatusCode::kUnimplemented;
    case ERROR_NOT_READY:
    case ERROR_LOCK_VIOLATION:
    case ERROR_LOCKED:
    case ERROR_RETRY:
    case WSAEINTR:
      return absl::StatusCode::kUnavailable;
    default:
      return absl::StatusCode::kUnknown;
  }
}

}  // namespace

#endif  // _WIN32

int StatusCodeToErrno(absl::StatusCode status_code) {
  switch (status_code) {
    case absl::StatusCode::kOk:
      return 0;
    case absl::StatusCode::kCancelled:
      return ECANCELED;
    case absl::StatusCode::kUnknown:
      return EIO;
    case absl::StatusCode::kInvalidArgument:
      return EINVAL;
    case absl::StatusCode::kDeadlineExceeded:
      return ETIMEDOUT;
    case absl::StatusCode::kNotFound:
      return ENOENT;
    case absl::StatusCode::kAlreadyExists:
      return EEXIST;
    case absl::StatusCode::kPermissionDenied:
      return EACCES;
    case absl::StatusCode::kResourceExhausted:
      return ENOSPC;
    case absl::StatusCode::kFailedPrecondition:
      // Does not round trip:
      // `absl::ErrnoToStatusCode(EINVAL) == absl::StatusCode::kInvalidArgument`
      return EINVAL;
    case absl::StatusCode::kAborted:
      return EDEADLK;
    case absl::StatusCode::kOutOfRange:
      return ERANGE;
    case absl::StatusCode::kUnimplemented:
      return ENOTSUP;
    case absl::StatusCode::kInternal:
      // Does not round trip:
      // `absl::ErrnoToStatusCode(EIO) == absl::StatusCode::kUnknown`
      return EIO;
    case absl::StatusCode::kUnavailable:
      return EAGAIN;
    case absl::StatusCode::kDataLoss:
      // Does not round trip:
      // `absl::ErrnoToStatusCode(EIO) == absl::StatusCode::kUnknown`
      return EIO;
    case absl::StatusCode::kUnauthenticated:
      // Does not round trip:
      // `absl::ErrnoToStatusCode(EACCES) ==
      //      absl::StatusCode::kPermissionDenied`
      return EACCES;
    default:
      // Does not round trip:
      // `absl::ErrnoToStatusCode(EIO) == absl::StatusCode::kUnknown`
      return EIO;
  }
}

#ifdef _WIN32
absl::Status WindowsErrorToStatus(uint32_t error_number,
                                  absl::string_view message) {
  LPWSTR os_message;
  const DWORD length = FormatMessageW(
      FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
          FORMAT_MESSAGE_IGNORE_INSERTS,
      nullptr, IntCast<DWORD>(error_number), 0,
      reinterpret_cast<LPWSTR>(&os_message), 0, nullptr);
  const absl::Status status(
      WindowsErrorToStatusCode(IntCast<DWORD>(error_number)),
      absl::StrCat(message, ": ",
                   WideToUtf8Lossy(absl::MakeConstSpan(
                       os_message, IntCast<size_t>(length)))));
  LocalFree(os_message);
  return status;
}
#endif  // _WIN32

}  // namespace riegeli


================================================
FILE: riegeli/base/errno_mapping.h
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_ERRNO_MAPPING_H_
#define RIEGELI_BASE_ERRNO_MAPPING_H_

#ifdef _WIN32
#include <stdint.h>
#endif

#include "absl/status/status.h"
#ifdef _WIN32
#include "absl/strings/string_view.h"
#endif

namespace riegeli {

// Converts `absl::StatusCode` to `errno` value.
int StatusCodeToErrno(absl::StatusCode status_code);

#ifdef _WIN32
absl::Status WindowsErrorToStatus(uint32_t error_number,
                                  absl::string_view message);
#endif  // _WIN32

}  // namespace riegeli

#endif  // RIEGELI_BASE_ERRNO_MAPPING_H_


================================================
FILE: riegeli/base/estimated_allocated_size.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_ESTIMATED_ALLOCATED_SIZE_H_
#define RIEGELI_BASE_ESTIMATED_ALLOCATED_SIZE_H_

#include <stddef.h>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "riegeli/base/arithmetic.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// Returns the estimated size which will be allocated when requesting to
// allocate `requested_size`.
inline size_t EstimatedAllocatedSize(size_t requested_size) {
  // Placeholder for asking the memory manager, which might be possible on some
  // platforms.
  return RoundUp<2 * sizeof(void*)>(
      UnsignedMax(requested_size, 4 * sizeof(void*)));
}

// Returns the estimated size which was allocated at `ptr` when requested to
// allocate `requested_size`.
inline size_t EstimatedAllocatedSize(ABSL_ATTRIBUTE_UNUSED const void* ptr,
                                     size_t requested_size) {
  // Placeholder for using `ptr`, which might be possible on some platforms.
  return EstimatedAllocatedSize(requested_size);
}

// A deterministic variant of `EstimatedAllocatedSize()`, useful for testing.
inline size_t EstimatedAllocatedSizeForTesting(size_t requested_size) {
  return 16 + requested_size;
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_ESTIMATED_ALLOCATED_SIZE_H_


================================================
FILE: riegeli/base/external_data.cc
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/base/external_data.h"

#include <cstring>

#include "absl/strings/string_view.h"

namespace riegeli {

ExternalData ExternalDataCopy(absl::string_view data) {
  char* storage = nullptr;
  if (!data.empty()) {
    storage = static_cast<char*>(operator new(data.size()));
    std::memcpy(storage, data.data(), data.size());
  }
  return ExternalData{ExternalStorage(storage, operator delete),
                      absl::string_view(storage, data.size())};
}

}  // namespace riegeli


================================================
FILE: riegeli/base/external_data.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_EXTERNAL_DATA_H_
#define RIEGELI_BASE_EXTERNAL_DATA_H_

#include <memory>
#include <utility>

#include "absl/strings/string_view.h"

namespace riegeli {

// Type-erased external object with its deleter.
//
// `ExternalStorage` can be decomposed with `void* ExternalStorage::release()`
// and `ExternalStorage::get_deleter() -> void (*)(void*)`.
using ExternalStorage = std::unique_ptr<void, void (*)(void*)>;

// Supports `ExternalRef`.
inline ExternalStorage RiegeliToExternalStorage(ExternalStorage* self) {
  return std::move(*self);
}

// Type-erased external object with its deleter and a substring of a byte array
// it owns.
struct ExternalData {
  /*implicit*/ operator absl::string_view() const { return substr; }

  // Indicates support for:
  //  * `ExternalRef(ExternalData&&)`
  //  * `ExternalRef(ExternalData&&, substr)`
  friend void RiegeliSupportsExternalRef(ExternalData*) {}

  // Supports `ExternalRef`.
  friend ExternalStorage RiegeliToExternalStorage(ExternalData* self) {
    return std::move(self->storage);
  }

  ExternalStorage storage;  // Must outlive usages of `substr`.
  absl::string_view substr;
};

// Creates `ExternalData` holding a copy of `data`.
ExternalData ExternalDataCopy(absl::string_view data);

}  // namespace riegeli

#endif  // RIEGELI_BASE_EXTERNAL_DATA_H_


================================================
FILE: riegeli/base/external_ref.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_EXTERNAL_REF_H_
#define RIEGELI_BASE_EXTERNAL_REF_H_

#include "riegeli/base/chain_base.h"            // IWYU pragma: keep
#include "riegeli/base/chain_details.h"         // IWYU pragma: keep
#include "riegeli/base/external_ref_base.h"     // IWYU pragma: export
#include "riegeli/base/external_ref_support.h"  // IWYU pragma: export

#endif  // RIEGELI_BASE_EXTERNAL_REF_H_


================================================
FILE: riegeli/base/external_ref_base.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_EXTERNAL_REF_BASE_H_
#define RIEGELI_BASE_EXTERNAL_REF_BASE_H_

// IWYU pragma: private, include "riegeli/base/external_ref.h"

#include <stddef.h>

#include <functional>
#include <memory>
#include <new>  // IWYU pragma: keep
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain_base.h"
#include "riegeli/base/cord_utils.h"
#include "riegeli/base/external_data.h"
#include "riegeli/base/external_ref_support.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/temporary_storage.h"

namespace riegeli {

// `ExternalRef` specifies a byte array in a way which allows sharing it with
// other objects without copying if that is considered more efficient than
// copying. It mediates between the producer and the consumer of the data during
// transfer; it is not suitable for longer storage. Creating an `ExternalRef` is
// usually more efficient than creating a `Chain` or `absl::Cord` if the data
// will ultimately be copied rather than shared.
//
// `ExternalRef` is constructed from an object of some type `T` which owns the
// data, or from `Initializer<T>`. The latter allows to skip constructing the
// object if the data are known beforehand, will ultimately be copied, and the
// constructed object is not needed otherwise.
//
// `ExternalRef` can be converted to `absl::string_view`, `Chain`, `absl::Cord`,
// or `ExternalData`, or assigned, appended, or prepended to a `Chain` or
// `absl::Cord`. Apart from conversion to `absl::string_view` it can be consumed
// at most once.
//
// In contrast to `Chain::Block` and `absl::MakeCordFromExternal()`,
// `ExternalRef` chooses between sharing the object and copying the data,
// depending on the size of the data, the method of consuming the data,
// and the state of the destination for appending or prepending.
//
// `ExternalRef` itself does not own the object description nor the data, and is
// efficiently movable. The state is stored in a storage object passed as a
// default argument to the original `ExternalRef` constructor.
//
// The expected interface of the object which owns the data is a superset of the
// interfaces expected by `Chain::Block` and `absl::MakeCordFromExternal()`.
//
// `ExternalRef` constructors require the external object type to indicate
// that it supports `ExternalRef` by providing one of the following functions
// (only their presence is checked, they are never called):
// ```
//   // Indicates support for `ExternalRef(T&&, substr)`.
//   //
//   // `substr` must be owned by the object if it gets created or moved, unless
//   // `RiegeliExternalCopy()` (see below) recognizes cases when it is not.
//   //
//   // If `T` is convertible to `BytesRef`, then also indicates support for
//   // `ExternalRef(T&&)`.
//   //
//   // The parameter can also have type `const T*`. This also indicates
//   // support for `ExternalRef(const T&, substr)` and possibly
//   // `ExternalRef(const T&)`, i.e. that `T` is copyable and copying it is
//   // more efficient than copying the data.
//   //
//   // If the `ExternalRef` is later converted to `absl::Cord` and
//   // `absl::MakeCordFromExternal()` gets used, then this avoids an allocation
//   // by taking advantage of the promise that `substr` will be owned also by
//   // the moved object (`absl::MakeCordFromExternal()` requires knowing the
//   // data before specifying the object to be moved).
//   friend void RiegeliSupportsExternalRef(T*) {}
//
//   // Indicates support for `ExternalRef(T&&)`, as long as `T` is convertible
//   // to `BytesRef`.
//   //
//   // The parameter can also have type `const T*`. This also indicates support
//   // for `ExternalRef(const T&)`, i.e. that `T` is copyable and copying it is
//   // more efficient than copying the data.
//   friend void RiegeliSupportsExternalRefWhole(T*) {}
// ```
//
// `ExternalRef::From()` are like `ExternalRef` constructors, but
// `RiegeliSupportsExternalRef()` or `RiegeliSupportsExternalRefWhole()` is not
// needed. The caller is responsible for using an appropriate type of the
// external object.
//
// `T` may also support the following member functions, either with or without
// the `substr` parameter, with the following definitions assumed by default:
// ```
//   // Called once before the destructor, except on a moved-from object.
//   // If only this function is needed, `T` can be a lambda.
//   //
//   // If this is present, the object will be created unconditionally, because
//   // calling this might be needed to delete resources which already exist.
//   //
//   // This can also be a const method. If this is not a const method and the
//   // object is passed by const or lvalue reference, this will be called on a
//   // mutable copy of the object.
//   void operator()(absl::string_view substr) && {}
//
//   // If this returns `true`, the data will be copied instead of wrapping the
//   // object. The data does not need to be stable while the object is moved.
//   // `RiegeliToChainBlock()`, `RiegeliToCord()`, `RiegeliToExternalData()`,
//   // `RiegeliToExternalStorage()`, nor `RiegeliExternalDelegate()` will not
//   // be called.
//   //
//   // Typically this indicates an object with short data stored inline.
//   friend bool RiegeliExternalCopy(const T* self) { return false; }
//
//   // Converts `*self` or its `substr` to `Chain::Block`, if this can be done
//   // more efficiently than with `Chain::Block` constructor. Can modify
//   // `*self`. `operator()` will no longer be called.
//   //
//   // The `self` parameter can also have type `const T*`. If it has type `T*`
//   // and the object is passed by const or lvalue reference, this will be
//   // called on a mutable copy of the object.
//   //
//   // If the `substr` parameter was given to `ExternalRef` constructor, the
//   // `substr` parameter is required here, otherwise it is optional.
//   friend Chain::Block RiegeliToChainBlock(T* self, absl::string_view substr);
//
//   // Converts `*self` or its `substr` to `absl::Cord`, if this can be done
//   // more efficiently than with `absl::MakeCordFromExternal()`. Can modify
//   // `*self`. `operator()` will no longer be called.
//   //
//   // The `self` parameter can also have type `const T*`. If it has type `T*`
//   // and the object is passed by const or lvalue reference, this will be
//   // called on a mutable copy of the object.
//   //
//   // If the `substr` parameter was given to `ExternalRef` constructor, the
//   // `substr` parameter is required here, otherwise it is optional.
//   friend absl::Cord RiegeliToCord(T* self, absl::string_view substr);
//
//   // Converts `*self` to `ExternalData`, if this can be done more efficiently
//   // than allocating the object on the heap, e.g. if the object fits in a
//   // pointer. Can modify `*self`. `operator()` will no longer be called.
//   //
//   // The `self` parameter can also have type `const T*`. If it has type `T*`
//   // and the object is passed by const or lvalue reference, this will be
//   // called on a mutable copy of the object.
//   //
//   // If the `substr` parameter was given to `ExternalRef` constructor, the
//   // `substr` parameter is required here, otherwise it is optional.
//   friend ExternalData RiegeliToExternalData(T* self,
//                                             absl::string_view substr);
//
//   // This can be defined instead of `RiegeliToExternalData()` with `substr`,
//   // which would return `ExternalData` with the same `substr`.
//   friend ExternalStorage RiegeliToExternalStorage(T* self);
//
//   // If defined, indicates a subobject to wrap instead of the whole object.
//   // It must call `std::forward<Callback>(delegate_to)(subobject)` or
//   // `std::forward<Callback>(delegate_to)(subobject, substr)`, preferably
//   // with `std::move(subobject)`. `delegate_to` must be called exactly once.
//   //
//   // Typically this indicates a smaller object which is sufficient to keep
//   // the data alive, or the active variant if the object stores one of
//   // multiple subobjects.
//   //
//   // `RiegeliToChainBlock()`, `RiegeliToCord()`, `RiegeliToExternalData()`,
//   // and `RiegeliToExternalStorage()`, if defined, are used in preference to
//   // this.
//   //
//   // The subobject will be processed like by `ExternalRef::From()`, including
//   // the possibility of further delegation, except that `Initializer` is not
//   // supported. The subobject must not be `*self`.
//   //
//   // The `self` parameter can also have type `const T*`. If it has type `T*`
//   // and the object is passed by const or lvalue reference, this will be
//   // called on a mutable copy of the object.
//   //
//   // The `substr` parameter is optional here. If absent here while the
//   // `substr` parameter was given to `ExternalRef` constructor, then it is
//   // propagated. If absent here while the `substr` parameter was not given
//   // to `ExternalRef` constructor, then `subobject` must be convertible to
//   // `BytesRef`. If present here, it can be used to specify the data if
//   // `subobject` is not convertible to `BytesRef`.
//   template <typename Callback>
//   friend void RiegeliExternalDelegate(T* self, absl::string_view substr,
//                                       Callback&& delegate_to);
//
//   // Shows internal structure in a human-readable way, for debugging.
//   //
//   // Used for conversion to `Chain`.
//   friend void RiegeliDumpStructure(const T* self, absl::string_view substr,
//                                    std::ostream& dest) {
//     dest << "[external] { }";
//   }
//
//   // Registers this object with `MemoryEstimator`.
//   //
//   // By default calls `memory_estimator.RegisterUnknownType<T>()` and
//   // as an approximation of memory usage of an unknown type, registers just
//   // the stored `substr` if unique.
//   //
//   // Used for conversion to `Chain`.
//   friend void RiegeliRegisterSubobjects(
//       const T* self, riegeli::MemoryEstimator& memory_estimator);
// ```
//
// The `substr` parameter of these member functions, if present, will get the
// `substr` parameter passed to `ExternalRef` constructor. Having `substr`
// available in these functions might avoid storing `substr` in the external
// object.
class ExternalRef {
 private:
  using UseStringViewFunction = void (*)(void* context, absl::string_view data);
  using UseChainBlockFunction = void (*)(void* context, Chain::Block data);
  using UseCordFunction = void (*)(void* context, absl::Cord data);
  using UseExternalDataFunction = void (*)(void* context, ExternalData data);

  template <typename T, typename Enable = void>
  struct HasCallOperatorSubstr : std::false_type {};

  template <typename T>
  struct HasCallOperatorSubstr<T, std::void_t<decltype(std::declval<T&&>()(
                                      std::declval<absl::string_view>()))>>
      : std::true_type {};

  template <typename T, typename Enable = void>
  struct HasCallOperatorWhole : std::false_type {};

  template <typename T>
  struct HasCallOperatorWhole<T, std::void_t<decltype(std::declval<T&&>()())>>
      : std::true_type {};

  template <typename T>
  struct HasCallOperator
      : std::disjunction<HasCallOperatorSubstr<T>, HasCallOperatorWhole<T>> {};

  template <typename T,
            std::enable_if_t<HasCallOperatorSubstr<T>::value, int> = 0>
  static void CallOperatorWhole(T&& object) {
    const absl::string_view data = BytesRef(object);
    std::forward<T>(object)(data);
  }
  template <typename T,
            std::enable_if_t<
                std::conjunction_v<std::negation<HasCallOperatorSubstr<T>>,
                                   HasCallOperatorWhole<T>>,
                int> = 0>
  static void CallOperatorWhole(T&& object) {
    std::forward<T>(object)();
  }
  template <
      typename T,
      std::enable_if_t<
          std::conjunction_v<std::negation<HasCallOperator<T>>,
                             HasCallOperatorSubstr<absl::remove_cvref_t<T>>>,
          int> = 0>
  static void CallOperatorWhole(T&& object) {
    absl::remove_cvref_t<T> copy(object);
    const absl::string_view data = BytesRef(copy);
    std::move(copy)(data);
  }
  template <
      typename T,
      std::enable_if_t<
          std::conjunction_v<
              std::negation<HasCallOperator<T>>,
              std::negation<HasCallOperatorSubstr<absl::remove_cvref_t<T>>>,
              HasCallOperatorWhole<absl::remove_cvref_t<T>>>,
          int> = 0>
  static void CallOperatorWhole(T&& object) {
    (absl::remove_cvref_t<T>(object))();
  }
  template <typename T,
            std::enable_if_t<!HasCallOperator<absl::remove_cvref_t<T>>::value,
                             int> = 0>
  static void CallOperatorWhole(ABSL_ATTRIBUTE_UNUSED T&& object) {}

  template <typename T,
            std::enable_if_t<HasCallOperatorSubstr<T>::value, int> = 0>
  static void CallOperatorSubstr(T&& object, absl::string_view substr) {
    std::forward<T>(object)(substr);
  }
  template <typename T,
            std::enable_if_t<
                std::conjunction_v<std::negation<HasCallOperatorSubstr<T>>,
                                   HasCallOperatorWhole<T>>,
                int> = 0>
  static void CallOperatorSubstr(
      T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view substr) {
    std::forward<T>(object)();
  }
  template <
      typename T,
      std::enable_if_t<
          std::conjunction_v<std::negation<HasCallOperator<T>>,
                             HasCallOperatorSubstr<absl::remove_cvref_t<T>>>,
          int> = 0>
  static void CallOperatorSubstr(
      T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view substr) {
    absl::remove_cvref_t<T> copy(object);
    const absl::string_view data = BytesRef(copy);
    std::move(copy)(data);
  }
  template <
      typename T,
      std::enable_if_t<
          std::conjunction_v<
              std::negation<HasCallOperator<T>>,
              std::negation<HasCallOperatorSubstr<absl::remove_cvref_t<T>>>,
              HasCallOperatorWhole<absl::remove_cvref_t<T>>>,
          int> = 0>
  static void CallOperatorSubstr(
      T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view substr) {
    (absl::remove_cvref_t<T>(object))();
  }
  template <typename T,
            std::enable_if_t<!HasCallOperator<absl::remove_cvref_t<T>>::value,
                             int> = 0>
  static void CallOperatorSubstr(
      ABSL_ATTRIBUTE_UNUSED T&& object,
      ABSL_ATTRIBUTE_UNUSED absl::string_view substr) {}

  template <typename T>
  static external_ref_internal::PointerTypeT<T> Pointer(T&& object) {
    return &object;
  }

#if RIEGELI_DEBUG
  template <typename T, std::enable_if_t<
                            std::is_convertible_v<const T&, BytesRef>, int> = 0>
  static void AssertSubstr(const T& object, absl::string_view substr) {
    if (!substr.empty()) {
      const BytesRef whole = object;
      RIEGELI_ASSERT(std::greater_equal<>()(substr.data(), whole.data()))
          << "Failed precondition of ExternalRef: "
             "substring not contained in whole data";
      RIEGELI_ASSERT(std::less_equal<>()(substr.data() + substr.size(),
                                         whole.data() + whole.size()))
          << "Failed precondition of ExternalRef: "
             "substring not contained in whole data";
    }
  }
  template <
      typename T,
      std::enable_if_t<!std::is_convertible_v<const T&, BytesRef>, int> = 0>
#else
  template <typename T>
#endif
  static void AssertSubstr(ABSL_ATTRIBUTE_UNUSED const T& object,
                           ABSL_ATTRIBUTE_UNUSED absl::string_view substr) {
  }

  template <typename T, typename Callback, typename Enable = void>
  struct HasRiegeliExternalDelegateWhole : std::false_type {};

  template <typename T, typename Callback>
  struct HasRiegeliExternalDelegateWhole<
      T, Callback,
      std::void_t<decltype(RiegeliExternalDelegate(
          std::declval<external_ref_internal::PointerTypeT<T>>(),
          std::declval<Callback>()))>> : std::true_type {};

  template <typename T, typename Callback, typename Enable = void>
  struct HasRiegeliExternalDelegateSubstr : std::false_type {};

  template <typename T, typename Callback>
  struct HasRiegeliExternalDelegateSubstr<
      T, Callback,
      std::void_t<decltype(RiegeliExternalDelegate(
          std::declval<external_ref_internal::PointerTypeT<T>>(),
          std::declval<absl::string_view>(), std::declval<Callback>()))>>
      : std::true_type {};

  template <typename T, typename Callback>
  struct HasExternalDelegateWhole
      : std::disjunction<HasRiegeliExternalDelegateWhole<T, Callback>,
                         HasRiegeliExternalDelegateSubstr<T, Callback>> {};

  template <typename T, typename Callback,
            std::enable_if_t<
                HasRiegeliExternalDelegateWhole<T, Callback>::value, int> = 0>
  static void ExternalDelegateWhole(T&& object, Callback&& delegate_to) {
    RiegeliExternalDelegate(ExternalRef::Pointer(std::forward<T>(object)),
                            std::forward<Callback>(delegate_to));
  }
  template <typename T, typename Callback,
            std::enable_if_t<
                std::conjunction_v<
                    std::negation<HasRiegeliExternalDelegateWhole<T, Callback>>,
                    HasRiegeliExternalDelegateSubstr<T, Callback>>,
                int> = 0>
  static void ExternalDelegateWhole(T&& object, Callback&& delegate_to) {
    const absl::string_view data = BytesRef(object);
    RiegeliExternalDelegate(ExternalRef::Pointer(std::forward<T>(object)), data,
                            std::forward<Callback>(delegate_to));
  }

  template <typename T, typename Callback,
            std::enable_if_t<
                HasRiegeliExternalDelegateWhole<T, Callback>::value, int> = 0>
  static void ExternalDelegateWhole(
      T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data,
      Callback&& delegate_to) {
    RiegeliExternalDelegate(ExternalRef::Pointer(std::forward<T>(object)),
                            std::forward<Callback>(delegate_to));
  }
  template <typename T, typename Callback,
            std::enable_if_t<
                std::conjunction_v<
                    std::negation<HasRiegeliExternalDelegateWhole<T, Callback>>,
                    HasRiegeliExternalDelegateSubstr<T, Callback>>,
                int> = 0>
  static void ExternalDelegateWhole(T&& object, absl::string_view data,
                                    Callback&& delegate_to) {
    RiegeliExternalDelegate(ExternalRef::Pointer(std::forward<T>(object)), data,
                            std::forward<Callback>(delegate_to));
  }

  template <typename T, typename Callback>
  struct HasExternalDelegateSubstr
      : std::disjunction<HasRiegeliExternalDelegateSubstr<T, Callback>,
                         HasRiegeliExternalDelegateWhole<T, Callback>> {};

  template <typename T, typename Callback,
            std::enable_if_t<
                HasRiegeliExternalDelegateSubstr<T, Callback>::value, int> = 0>
  static void ExternalDelegateSubstr(T&& object, absl::string_view substr,
                                     Callback&& delegate_to) {
    RiegeliExternalDelegate(ExternalRef::Pointer(std::forward<T>(object)),
                            substr, std::forward<Callback>(delegate_to));
  }
  template <
      typename T, typename Callback,
      std::enable_if_t<
          std::conjunction_v<
              std::negation<HasRiegeliExternalDelegateSubstr<T, Callback>>,
              HasRiegeliExternalDelegateWhole<T, Callback>>,
          int> = 0>
  static void ExternalDelegateSubstr(
      T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view substr,
      Callback&& delegate_to) {
    RiegeliExternalDelegate(ExternalRef::Pointer(std::forward<T>(object)),
                            std::forward<Callback>(delegate_to));
  }

  template <typename T, typename Enable = void>
  struct HasRiegeliToChainBlockWhole : std::false_type {};

  template <typename T>
  struct HasRiegeliToChainBlockWhole<
      T, std::enable_if_t<std::is_convertible_v<
             decltype(RiegeliToChainBlock(
                 std::declval<external_ref_internal::PointerTypeT<T>>())),
             Chain::Block>>> : std::true_type {};

  template <typename T, typename Enable = void>
  struct HasRiegeliToChainBlockSubstr : std::false_type {};

  template <typename T>
  struct HasRiegeliToChainBlockSubstr<
      T, std::enable_if_t<std::is_convertible_v<
             decltype(RiegeliToChainBlock(
                 std::declval<external_ref_internal::PointerTypeT<T>>(),
                 std::declval<absl::string_view>())),
             Chain::Block>>> : std::true_type {};

  template <typename T>
  struct HasToChainBlockWhole
      : std::disjunction<HasRiegeliToChainBlockWhole<T>,
                         HasRiegeliToChainBlockSubstr<T>> {};

  template <typename T,
            std::enable_if_t<HasRiegeliToChainBlockWhole<T>::value, int> = 0>
  static Chain::Block ToChainBlockWhole(T&& object) {
    return RiegeliToChainBlock(ExternalRef::Pointer(std::forward<T>(object)));
  }
  template <typename T,
            std::enable_if_t<std::conjunction_v<
                                 std::negation<HasRiegeliToChainBlockWhole<T>>,
                                 HasRiegeliToChainBlockSubstr<T>>,
                             int> = 0>
  static Chain::Block ToChainBlockWhole(T&& object) {
    const absl::string_view data = BytesRef(object);
    return RiegeliToChainBlock(ExternalRef::Pointer(std::forward<T>(object)),
                               data);
  }

  template <typename T,
            std::enable_if_t<HasRiegeliToChainBlockWhole<T>::value, int> = 0>
  static Chain::Block ToChainBlockWhole(
      T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) {
    return RiegeliToChainBlock(ExternalRef::Pointer(std::forward<T>(object)));
  }
  template <typename T,
            std::enable_if_t<std::conjunction_v<
                                 std::negation<HasRiegeliToChainBlockWhole<T>>,
                                 HasRiegeliToChainBlockSubstr<T>>,
                             int> = 0>
  static Chain::Block ToChainBlockWhole(T&& object, absl::string_view data) {
    return RiegeliToChainBlock(ExternalRef::Pointer(std::forward<T>(object)),
                               data);
  }

  template <typename T>
  using HasToChainBlockSubstr = HasRiegeliToChainBlockSubstr<T>;

  template <typename T,
            std::enable_if_t<HasRiegeliToChainBlockSubstr<T>::value, int> = 0>
  static Chain::Block ToChainBlockSubstr(T&& object, absl::string_view substr) {
    return RiegeliToChainBlock(ExternalRef::Pointer(std::forward<T>(object)),
                               substr);
  }

  template <typename T>
  class ConverterToChainBlockWhole {
   public:
    ConverterToChainBlockWhole(const ConverterToChainBlockWhole&) = delete;
    ConverterToChainBlockWhole& operator=(const ConverterToChainBlockWhole&) =
        delete;

    template <
        typename SubT,
        std::enable_if_t<std::is_convertible_v<const SubT&, BytesRef>, int> = 0>
    void operator()(SubT&& subobject) && {
      // The constructor processes the subobject.
      const absl::string_view data = BytesRef(subobject);
      ConverterToChainBlockWhole<SubT> converter(
          std::forward<SubT>(subobject), data, context_, use_string_view_,
          use_chain_block_);
    }

    template <typename SubT>
    void operator()(SubT&& subobject, absl::string_view substr) && {
      // The constructor processes the subobject.
      ConverterToChainBlockSubstr<SubT> converter(
          std::forward<SubT>(subobject), substr, context_, use_string_view_,
          use_chain_block_);
    }

   private:
    friend class ExternalRef;

    ABSL_ATTRIBUTE_ALWAYS_INLINE
    explicit ConverterToChainBlockWhole(T&& object, absl::string_view data,
                                        void* context,
                                        UseStringViewFunction use_string_view,
                                        UseChainBlockFunction use_chain_block)
        : context_(context),
          use_string_view_(use_string_view),
          use_chain_block_(use_chain_block) {
      if (RiegeliExternalCopy(&object)) {
        use_string_view_(context_, data);
        ExternalRef::CallOperatorWhole(std::forward<T>(object));
        return;
      }
      std::move(*this).Callback(std::forward<T>(object), data);
    }

    template <
        typename DependentT = T,
        std::enable_if_t<HasToChainBlockWhole<DependentT>::value, int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object,
                                               absl::string_view data) && {
      use_chain_block_(context_, ExternalRef::ToChainBlockWhole(
                                     std::forward<T>(object), data));
    }
    template <typename DependentT = T,
              std::enable_if_t<
                  std::conjunction_v<
                      std::negation<HasToChainBlockWhole<DependentT>>,
                      HasToChainBlockWhole<absl::remove_cvref_t<DependentT>>>,
                  int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(
        T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) && {
      use_chain_block_(context_, ExternalRef::ToChainBlockWhole(
                                     absl::remove_cvref_t<T>(object)));
    }
    template <typename DependentT = T,
              std::enable_if_t<std::conjunction_v<
                                   std::negation<HasToChainBlockWhole<
                                       absl::remove_cvref_t<DependentT>>>,
                                   HasExternalDelegateWhole<
                                       DependentT, ConverterToChainBlockWhole>>,
                               int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object,
                                               absl::string_view data) && {
      ExternalRef::ExternalDelegateWhole(std::forward<T>(object), data,
                                         std::move(*this));
    }
    template <typename DependentT = T,
              std::enable_if_t<
                  std::conjunction_v<
                      std::negation<HasToChainBlockWhole<
                          absl::remove_cvref_t<DependentT>>>,
                      std::negation<HasExternalDelegateWhole<
                          DependentT, ConverterToChainBlockWhole>>,
                      HasExternalDelegateWhole<absl::remove_cvref_t<DependentT>,
                                               ConverterToChainBlockWhole>>,
                  int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(
        T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) && {
      ExternalRef::ExternalDelegateWhole(absl::remove_cvref_t<T>(object),
                                         std::move(*this));
    }
    template <typename DependentT = T,
              std::enable_if_t<
                  std::conjunction_v<std::negation<HasToChainBlockWhole<
                                         absl::remove_cvref_t<DependentT>>>,
                                     std::negation<HasExternalDelegateWhole<
                                         absl::remove_cvref_t<DependentT>,
                                         ConverterToChainBlockWhole>>>,
                  int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(
        T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) && {
      use_chain_block_(context_, Chain::Block(std::forward<T>(object)));
    }

    void* context_;
    UseStringViewFunction use_string_view_;
    UseChainBlockFunction use_chain_block_;
  };

  template <typename T>
  class ConverterToChainBlockSubstr {
   public:
    ConverterToChainBlockSubstr(const ConverterToChainBlockSubstr&) = delete;
    ConverterToChainBlockSubstr& operator=(const ConverterToChainBlockSubstr&) =
        delete;

    template <typename SubT>
    void operator()(SubT&& subobject) && {
      std::move (*this)(std::forward<SubT>(subobject), substr_);
    }

    template <typename SubT>
    void operator()(SubT&& subobject, absl::string_view substr) && {
      RIEGELI_ASSERT_EQ(substr_.size(), substr.size())
          << "ExternalRef: size mismatch";
      // The constructor processes the subobject.
      ConverterToChainBlockSubstr<SubT> converter(
          std::forward<SubT>(subobject), substr, context_, use_string_view_,
          use_chain_block_);
    }

   private:
    friend class ExternalRef;

    ABSL_ATTRIBUTE_ALWAYS_INLINE
    explicit ConverterToChainBlockSubstr(T&& object, absl::string_view substr,
                                         void* context,
                                         UseStringViewFunction use_string_view,
                                         UseChainBlockFunction use_chain_block)
        : substr_(substr),
          context_(context),
          use_string_view_(use_string_view),
          use_chain_block_(use_chain_block) {
      AssertSubstr(object, substr_);
      if (RiegeliExternalCopy(&object)) {
        use_string_view_(context_, substr_);
        ExternalRef::CallOperatorSubstr(std::forward<T>(object), substr_);
        return;
      }
      std::move(*this).Callback(std::forward<T>(object));
    }

    template <
        typename DependentT = T,
        std::enable_if_t<HasToChainBlockSubstr<DependentT>::value, int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && {
      use_chain_block_(context_, ExternalRef::ToChainBlockSubstr(
                                     std::forward<T>(object), substr_));
    }
    template <typename DependentT = T,
              std::enable_if_t<
                  std::conjunction_v<
                      std::negation<HasToChainBlockSubstr<DependentT>>,
                      HasToChainBlockSubstr<absl::remove_cvref_t<DependentT>>>,
                  int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && {
      use_chain_block_(context_, ExternalRef::ToChainBlockSubstr(
                                     absl::remove_cvref_t<T>(object), substr_));
    }
    template <
        typename DependentT = T,
        std::enable_if_t<
            std::conjunction_v<std::negation<HasToChainBlockSubstr<
                                   absl::remove_cvref_t<DependentT>>>,
                               HasExternalDelegateSubstr<
                                   DependentT, ConverterToChainBlockSubstr>>,
            int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && {
      ExternalRef::ExternalDelegateSubstr(std::forward<T>(object), substr_,
                                          std::move(*this));
    }
    template <
        typename DependentT = T,
        std::enable_if_t<
            std::conjunction_v<
                std::negation<
                    HasToChainBlockSubstr<absl::remove_cvref_t<DependentT>>>,
                std::negation<HasExternalDelegateSubstr<
                    DependentT, ConverterToChainBlockSubstr>>,
                HasExternalDelegateSubstr<absl::remove_cvref_t<DependentT>,
                                          ConverterToChainBlockSubstr>>,
            int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && {
      ExternalRef::ExternalDelegateSubstr(absl::remove_cvref_t<T>(object),
                                          substr_, std::move(*this));
    }
    template <typename DependentT = T,
              std::enable_if_t<
                  std::conjunction_v<std::negation<HasToChainBlockSubstr<
                                         absl::remove_cvref_t<DependentT>>>,
                                     std::negation<HasExternalDelegateSubstr<
                                         absl::remove_cvref_t<DependentT>,
                                         ConverterToChainBlockSubstr>>>,
                  int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && {
      use_chain_block_(context_,
                       Chain::Block(std::forward<T>(object), substr_));
    }

    absl::string_view substr_;
    void* context_;
    UseStringViewFunction use_string_view_;
    UseChainBlockFunction use_chain_block_;
  };

  template <typename T>
  class ObjectForCordWhole {
   public:
    explicit ObjectForCordWhole(Initializer<T> object)
        : ptr_(std::move(object)) {}

    ObjectForCordWhole(ObjectForCordWhole&& that) = default;
    ObjectForCordWhole& operator=(ObjectForCordWhole&& that) = default;

    void operator()(absl::string_view substr) && {
      ExternalRef::CallOperatorSubstr(std::move(*ptr_), substr);
    }

    T& operator*() { return *ptr_; }
    const T& operator*() const { return *ptr_; }

   private:
    // Wrapped in `std::unique_ptr` so that the data are stable.
    // `absl::MakeCordFromExternal()` requires the data to be known beforehand
    // and valid for the moved external object.
    std::unique_ptr<T> ptr_;
  };

  template <typename T>
  class ObjectForCordSubstr {
   public:
    explicit ObjectForCordSubstr(Initializer<T> object,
                                 absl::string_view substr)
        : object_(std::move(object)) {
      AssertSubstr(**this, substr);
    }

    ObjectForCordSubstr(ObjectForCordSubstr&& that) = default;
    ObjectForCordSubstr& operator=(ObjectForCordSubstr&& that) = default;

    void operator()(absl::string_view substr) && {
      ExternalRef::CallOperatorSubstr(std::move(object_), substr);
    }

    T& operator*() { return object_; }
    const T& operator*() const { return object_; }

   private:
    ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS T object_;
  };

  template <typename T, typename Enable = void>
  struct HasRiegeliToCordWhole : std::false_type {};

  template <typename T>
  struct HasRiegeliToCordWhole<
      T, std::enable_if_t<std::is_convertible_v<
             decltype(RiegeliToCord(
                 std::declval<external_ref_internal::PointerTypeT<T>>())),
             absl::Cord>>> : std::true_type {};

  template <typename T, typename Enable = void>
  struct HasRiegeliToCordSubstr : std::false_type {};

  template <typename T>
  struct HasRiegeliToCordSubstr<
      T, std::enable_if_t<std::is_convertible_v<
             decltype(RiegeliToCord(
                 std::declval<external_ref_internal::PointerTypeT<T>>(),
                 std::declval<absl::string_view>())),
             absl::Cord>>> : std::true_type {};

  template <typename T>
  struct HasToCordWhole
      : std::disjunction<HasRiegeliToCordWhole<T>, HasRiegeliToCordSubstr<T>> {
  };

  template <typename T,
            std::enable_if_t<HasRiegeliToCordWhole<T>::value, int> = 0>
  static absl::Cord ToCordWhole(T&& object) {
    return RiegeliToCord(ExternalRef::Pointer(std::forward<T>(object)));
  }
  template <typename T,
            std::enable_if_t<
                std::conjunction_v<std::negation<HasRiegeliToCordWhole<T>>,
                                   HasRiegeliToCordSubstr<T>>,
                int> = 0>
  static absl::Cord ToCordWhole(T&& object) {
    const absl::string_view data = BytesRef(object);
    return RiegeliToCord(ExternalRef::Pointer(std::forward<T>(object)), data);
  }

  template <typename T,
            std::enable_if_t<HasRiegeliToCordWhole<T>::value, int> = 0>
  static absl::Cord ToCordWhole(T&& object,
                                ABSL_ATTRIBUTE_UNUSED absl::string_view data) {
    return RiegeliToCord(ExternalRef::Pointer(std::forward<T>(object)));
  }
  template <typename T,
            std::enable_if_t<
                std::conjunction_v<std::negation<HasRiegeliToCordWhole<T>>,
                                   HasRiegeliToCordSubstr<T>>,
                int> = 0>
  static absl::Cord ToCordWhole(T&& object, absl::string_view data) {
    return RiegeliToCord(ExternalRef::Pointer(std::forward<T>(object)), data);
  }

  template <typename T>
  using HasToCordSubstr = HasRiegeliToCordSubstr<T>;

  template <typename T,
            std::enable_if_t<HasRiegeliToCordSubstr<T>::value, int> = 0>
  static absl::Cord ToCordSubstr(T&& object, absl::string_view substr) {
    return RiegeliToCord(ExternalRef::Pointer(std::forward<T>(object)), substr);
  }

  template <typename T>
  class ConverterToCordWhole {
   public:
    ConverterToCordWhole(const ConverterToCordWhole&) = delete;
    ConverterToCordWhole& operator=(const ConverterToCordWhole&) = delete;

    template <
        typename SubT,
        std::enable_if_t<std::is_convertible_v<const SubT&, BytesRef>, int> = 0>
    void operator()(SubT&& subobject) && {
      // The constructor processes the subobject.
      const absl::string_view data = BytesRef(subobject);
      ConverterToCordWhole<SubT> converter(std::forward<SubT>(subobject), data,
                                           context_, use_string_view_,
                                           use_cord_);
    }

    template <typename SubT>
    void operator()(SubT&& subobject, absl::string_view substr) && {
      // The constructor processes the subobject.
      ConverterToCordSubstr<SubT> converter(std::forward<SubT>(subobject),
                                            substr, context_, use_string_view_,
                                            use_cord_);
    }

   private:
    friend class ExternalRef;

    ABSL_ATTRIBUTE_ALWAYS_INLINE
    explicit ConverterToCordWhole(T&& object, absl::string_view data,
                                  void* context,
                                  UseStringViewFunction use_string_view,
                                  UseCordFunction use_cord)
        : context_(context),
          use_string_view_(use_string_view),
          use_cord_(use_cord) {
      if (RiegeliExternalCopy(&object)) {
        use_string_view_(context_, data);
        ExternalRef::CallOperatorWhole(std::forward<T>(object));
        return;
      }
      std::move(*this).Callback(std::forward<T>(object), data);
    }

    template <typename DependentT = T,
              std::enable_if_t<HasToCordWhole<DependentT>::value, int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object,
                                               absl::string_view data) && {
      use_cord_(context_,
                ExternalRef::ToCordWhole(std::forward<T>(object), data));
    }
    template <
        typename DependentT = T,
        std::enable_if_t<std::conjunction_v<
                             std::negation<HasToCordWhole<DependentT>>,
                             HasToCordWhole<absl::remove_cvref_t<DependentT>>>,
                         int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(
        T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) && {
      use_cord_(context_, ExternalRef::ToCordWhole(absl::remove_cvref_t<T>(
                              std::forward<T>(object))));
    }
    template <
        typename DependentT = T,
        std::enable_if_t<
            std::conjunction_v<
                std::negation<HasToCordWhole<absl::remove_cvref_t<DependentT>>>,
                HasExternalDelegateWhole<DependentT, ConverterToCordWhole>>,
            int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object,
                                               absl::string_view data) && {
      ExternalRef::ExternalDelegateWhole(std::forward<T>(object), data,
                                         std::move(*this));
    }
    template <
        typename DependentT = T,
        std::enable_if_t<
            std::conjunction_v<
                std::negation<HasToCordWhole<absl::remove_cvref_t<DependentT>>>,
                std::negation<
                    HasExternalDelegateWhole<DependentT, ConverterToCordWhole>>,
                HasExternalDelegateWhole<absl::remove_cvref_t<DependentT>,
                                         ConverterToCordWhole>>,
            int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(
        T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) && {
      ExternalRef::ExternalDelegateWhole(absl::remove_cvref_t<T>(object),
                                         std::move(*this));
    }
    template <
        typename DependentT = T,
        std::enable_if_t<
            std::conjunction_v<
                std::negation<HasToCordWhole<absl::remove_cvref_t<DependentT>>>,
                std::negation<HasExternalDelegateWhole<
                    absl::remove_cvref_t<DependentT>, ConverterToCordWhole>>,
                SupportsExternalRefSubstr<std::decay_t<DependentT>>>,
            int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object,
                                               absl::string_view data) && {
      // If the type indicates that substrings are stable, then
      // `ObjectForCordSubstr` can be used instead of `ObjectForCordWhole`.
      use_cord_(context_, absl::MakeCordFromExternal(
                              data, ObjectForCordSubstr<std::decay_t<T>>(
                                        std::forward<T>(object), data)));
    }
    template <
        typename DependentT = T,
        std::enable_if_t<
            std::conjunction_v<
                std::negation<HasToCordWhole<absl::remove_cvref_t<DependentT>>>,
                std::negation<HasExternalDelegateWhole<
                    absl::remove_cvref_t<DependentT>, ConverterToCordWhole>>,
                std::negation<
                    SupportsExternalRefSubstr<std::decay_t<DependentT>>>>,
            int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(
        T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) && {
      ObjectForCordWhole<std::decay_t<T>> object_for_cord(
          std::forward<T>(object));
      const absl::string_view moved_data = BytesRef(*object_for_cord);
      use_cord_(context_, absl::MakeCordFromExternal(
                              moved_data, std::move(object_for_cord)));
    }

    void* context_;
    UseStringViewFunction use_string_view_;
    UseCordFunction use_cord_;
  };

  template <typename T>
  class ConverterToCordSubstr {
   public:
    ConverterToCordSubstr(const ConverterToCordSubstr&) = delete;
    ConverterToCordSubstr& operator=(const ConverterToCordSubstr&) = delete;

    template <typename SubT>
    void operator()(SubT&& subobject) && {
      std::move (*this)(std::forward<SubT>(subobject), substr_);
    }

    template <typename SubT>
    void operator()(SubT&& subobject, absl::string_view substr) && {
      RIEGELI_ASSERT_EQ(substr_.size(), substr.size())
          << "ExternalRef: size mismatch";
      // The constructor processes the subobject.
      ConverterToCordSubstr<SubT> converter(std::forward<SubT>(subobject),
                                            substr, context_, use_string_view_,
                                            use_cord_);
    }

   private:
    friend class ExternalRef;

    ABSL_ATTRIBUTE_ALWAYS_INLINE
    explicit ConverterToCordSubstr(T&& object, absl::string_view substr,
                                   void* context,
                                   UseStringViewFunction use_string_view,
                                   UseCordFunction use_cord)
        : substr_(substr),
          context_(context),
          use_string_view_(use_string_view),
          use_cord_(use_cord) {
      AssertSubstr(object, substr_);
      if (RiegeliExternalCopy(&object)) {
        use_string_view_(context_, substr_);
        ExternalRef::CallOperatorSubstr(std::forward<T>(object), substr_);
        return;
      }
      std::move(*this).Callback(std::forward<T>(object));
    }

    template <typename DependentT = T,
              std::enable_if_t<HasToCordSubstr<DependentT>::value, int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && {
      use_cord_(context_,
                ExternalRef::ToCordSubstr(std::forward<T>(object), substr_));
    }
    template <
        typename DependentT = T,
        std::enable_if_t<std::conjunction_v<
                             std::negation<HasToCordSubstr<DependentT>>,
                             HasToCordSubstr<absl::remove_cvref_t<DependentT>>>,
                         int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && {
      use_cord_(context_, ExternalRef::ToCordSubstr(
                              absl::remove_cvref_t<T>(object), substr_));
    }
    template <
        typename DependentT = T,
        std::enable_if_t<
            std::conjunction_v<
                std::negation<
                    HasToCordSubstr<absl::remove_cvref_t<DependentT>>>,
                HasExternalDelegateSubstr<DependentT, ConverterToCordSubstr>>,
            int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && {
      ExternalRef::ExternalDelegateSubstr(std::forward<T>(object), substr_,
                                          std::move(*this));
    }
    template <
        typename DependentT = T,
        std::enable_if_t<
            std::conjunction_v<
                std::negation<
                    HasToCordSubstr<absl::remove_cvref_t<DependentT>>>,
                std::negation<HasExternalDelegateSubstr<DependentT,
                                                        ConverterToCordSubstr>>,
                HasExternalDelegateSubstr<absl::remove_cvref_t<DependentT>,
                                          ConverterToCordSubstr>>,
            int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && {
      ExternalRef::ExternalDelegateSubstr(absl::remove_cvref_t<T>(object),
                                          substr_, std::move(*this));
    }
    template <
        typename DependentT = T,
        std::enable_if_t<
            std::conjunction_v<
                std::negation<
                    HasToCordSubstr<absl::remove_cvref_t<DependentT>>>,
                std::negation<HasExternalDelegateSubstr<
                    absl::remove_cvref_t<DependentT>, ConverterToCordSubstr>>>,
            int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && {
      use_cord_(context_, absl::MakeCordFromExternal(
                              substr_, ObjectForCordSubstr<std::decay_t<T>>(
                                           std::forward<T>(object), substr_)));
    }

    absl::string_view substr_;
    void* context_;
    UseStringViewFunction use_string_view_;
    UseCordFunction use_cord_;
  };

  template <typename T, typename Enable = void>
  class ExternalObjectWhole {
   public:
    explicit ExternalObjectWhole(Initializer<T> object)
        : object_(std::move(object)) {}

    ~ExternalObjectWhole() {
      ExternalRef::CallOperatorWhole(std::move(object_));
    }

    T& operator*() { return object_; }
    const T& operator*() const { return object_; }

   private:
    ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS T object_;
  };

  template <typename T, typename Enable = void>
  class ExternalObjectSubstr;

  template <typename T>
  class ExternalObjectSubstr<
      T, std::enable_if_t<HasCallOperatorSubstr<T>::value>> {
   public:
    explicit ExternalObjectSubstr(Initializer<T> object,
                                  absl::string_view substr)
        : object_(std::move(object)), substr_(substr) {
      AssertSubstr(**this, substr);
    }

    ~ExternalObjectSubstr() { std::move(object_)(substr_); }

    T& operator*() { return object_; }
    const T& operator*() const { return object_; }

   private:
    ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS T object_;
    absl::string_view substr_;
  };

  template <typename T>
  class ExternalObjectSubstr<T,
                             std::enable_if_t<!HasCallOperatorSubstr<T>::value>>
      : public ExternalObjectWhole<T> {
   public:
    explicit ExternalObjectSubstr(Initializer<T> object,
                                  absl::string_view substr)
        : ExternalObjectSubstr::ExternalObjectWhole(std::move(object)) {
      AssertSubstr(**this, substr);
    }
  };

  template <typename T, typename Enable = void>
  struct HasRiegeliToExternalDataWhole : std::false_type {};

  template <typename T>
  struct HasRiegeliToExternalDataWhole<
      T, std::enable_if_t<std::is_convertible_v<
             decltype(RiegeliToExternalData(
                 std::declval<external_ref_internal::PointerTypeT<T>>())),
             ExternalData>>> : std::true_type {};

  template <typename T, typename Enable = void>
  struct HasRiegeliToExternalDataSubstr : std::false_type {};

  template <typename T>
  struct HasRiegeliToExternalDataSubstr<
      T, std::enable_if_t<std::is_convertible_v<
             decltype(RiegeliToExternalData(
                 std::declval<external_ref_internal::PointerTypeT<T>>(),
                 std::declval<absl::string_view>())),
             ExternalData>>> : std::true_type {};

  template <typename T, typename Enable = void>
  struct HasRiegeliToExternalStorage : std::false_type {};

  template <typename T>
  struct HasRiegeliToExternalStorage<
      T, std::enable_if_t<std::is_convertible_v<
             decltype(RiegeliToExternalStorage(
                 std::declval<external_ref_internal::PointerTypeT<T>>())),
             ExternalStorage>>> : std::true_type {};

  template <typename T>
  struct HasToExternalDataSubstr
      : std::disjunction<HasRiegeliToExternalDataSubstr<T>,
                         HasRiegeliToExternalStorage<T>> {};

  template <typename T,
            std::enable_if_t<HasRiegeliToExternalDataSubstr<T>::value, int> = 0>
  static ExternalData ToExternalDataSubstr(T&& object,
                                           absl::string_view substr) {
    return RiegeliToExternalData(ExternalRef::Pointer(std::forward<T>(object)),
                                 substr);
  }

  template <
      typename T,
      std::enable_if_t<
          std::disjunction_v<std::negation<HasRiegeliToExternalDataSubstr<T>>,
                             HasRiegeliToExternalStorage<T>>,
          int> = 0>
  static ExternalData ToExternalDataSubstr(T&& object,
                                           absl::string_view substr) {
    return ExternalData{
        RiegeliToExternalStorage(ExternalRef::Pointer(std::forward<T>(object))),
        substr};
  }

  template <typename T>
  struct HasToExternalDataWhole
      : std::disjunction<HasRiegeliToExternalDataWhole<T>,
                         HasToExternalDataSubstr<T>> {};

  template <typename T,
            std::enable_if_t<HasRiegeliToExternalDataWhole<T>::value, int> = 0>
  static ExternalData ToExternalDataWhole(T&& object) {
    return RiegeliToExternalData(ExternalRef::Pointer(std::forward<T>(object)));
  }
  template <
      typename T,
      std::enable_if_t<
          std::conjunction_v<std::negation<HasRiegeliToExternalDataWhole<T>>,
                             HasToExternalDataSubstr<T>>,
          int> = 0>
  static ExternalData ToExternalDataWhole(T&& object) {
    const absl::string_view data = BytesRef(object);
    return ExternalRef::ToExternalDataSubstr(std::forward<T>(object), data);
  }

  template <typename T,
            std::enable_if_t<HasRiegeliToExternalDataWhole<T>::value, int> = 0>
  static ExternalData ToExternalDataWhole(
      T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) {
    return RiegeliToExternalData(ExternalRef::Pointer(std::forward<T>(object)));
  }
  template <
      typename T,
      std::enable_if_t<
          std::conjunction_v<std::negation<HasRiegeliToExternalDataWhole<T>>,
                             HasToExternalDataSubstr<T>>,
          int> = 0>
  static ExternalData ToExternalDataWhole(T&& object, absl::string_view data) {
    return ExternalRef::ToExternalDataSubstr(std::forward<T>(object), data);
  }

  template <typename T>
  class ConverterToExternalDataWhole {
   public:
    ConverterToExternalDataWhole(const ConverterToExternalDataWhole&) = delete;
    ConverterToExternalDataWhole& operator=(
        const ConverterToExternalDataWhole&) = delete;

    template <
        typename SubT,
        std::enable_if_t<std::is_convertible_v<const SubT&, BytesRef>, int> = 0>
    void operator()(SubT&& subobject) && {
      // The constructor processes the subobject.
      const absl::string_view data = BytesRef(subobject);
      ConverterToExternalDataWhole<SubT> converter(
          std::forward<SubT>(subobject), data, context_, use_external_data_);
    }

    template <typename SubT>
    void operator()(SubT&& subobject, absl::string_view substr) && {
      // The constructor processes the subobject.
      ConverterToExternalDataSubstr<SubT> converter(
          std::forward<SubT>(subobject), substr, context_, use_external_data_);
    }

   private:
    friend class ExternalRef;

    ABSL_ATTRIBUTE_ALWAYS_INLINE
    explicit ConverterToExternalDataWhole(
        T&& object, absl::string_view data, void* context,
        UseExternalDataFunction use_external_data)
        : context_(context), use_external_data_(use_external_data) {
      if (RiegeliExternalCopy(&object)) {
        use_external_data_(context_, ExternalDataCopy(data));
        ExternalRef::CallOperatorWhole(std::forward<T>(object));
        return;
      }
      std::move(*this).Callback(std::forward<T>(object), data);
    }

    template <
        typename DependentT = T,
        std::enable_if_t<HasToExternalDataWhole<DependentT>::value, int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object,
                                               absl::string_view data) && {
      use_external_data_(context_, ExternalRef::ToExternalDataWhole(
                                       std::forward<T>(object), data));
    }
    template <typename DependentT = T,
              std::enable_if_t<
                  std::conjunction_v<
                      std::negation<HasToExternalDataWhole<DependentT>>,
                      HasToExternalDataWhole<absl::remove_cvref_t<DependentT>>>,
                  int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(
        T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) && {
      use_external_data_(context_, ExternalRef::ToExternalDataWhole(
                                       absl::remove_cvref_t<T>(object)));
    }
    template <
        typename DependentT = T,
        std::enable_if_t<
            std::conjunction_v<std::negation<HasToExternalDataWhole<
                                   absl::remove_cvref_t<DependentT>>>,
                               HasExternalDelegateWhole<
                                   DependentT, ConverterToExternalDataWhole>>,
            int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object,
                                               absl::string_view data) && {
      ExternalRef::ExternalDelegateWhole(std::forward<T>(object), data,
                                         std::move(*this));
    }
    template <typename DependentT = T,
              std::enable_if_t<
                  std::conjunction_v<
                      std::negation<HasToExternalDataWhole<
                          absl::remove_cvref_t<DependentT>>>,
                      std::negation<HasExternalDelegateWhole<
                          DependentT, ConverterToExternalDataWhole>>,
                      HasExternalDelegateWhole<absl::remove_cvref_t<DependentT>,
                                               ConverterToExternalDataWhole>>,
                  int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(
        T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) && {
      ExternalRef::ExternalDelegateWhole(absl::remove_cvref_t<T>(object),
                                         std::move(*this));
    }
    template <typename DependentT = T,
              std::enable_if_t<
                  std::conjunction_v<std::negation<HasToExternalDataWhole<
                                         absl::remove_cvref_t<DependentT>>>,
                                     std::negation<HasExternalDelegateWhole<
                                         absl::remove_cvref_t<DependentT>,
                                         ConverterToExternalDataWhole>>>,
                  int> = 0>
    void Callback(T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) {
      auto* const storage =
          new ExternalObjectWhole<std::decay_t<T>>(std::forward<T>(object));
      const absl::string_view moved_data = BytesRef(**storage);
      use_external_data_(
          context_,
          ExternalData{
              ExternalStorage(
                  storage,
                  [](void* ptr) {
                    delete static_cast<ExternalObjectWhole<std::decay_t<T>>*>(
                        ptr);
                  }),
              moved_data});
    }

    void* context_;
    UseExternalDataFunction use_external_data_;
  };

  template <typename T>
  class ConverterToExternalDataSubstr {
   public:
    ConverterToExternalDataSubstr(const ConverterToExternalDataSubstr&) =
        delete;
    ConverterToExternalDataSubstr& operator=(
        const ConverterToExternalDataSubstr&) = delete;

    template <typename SubT>
    void operator()(SubT&& subobject) && {
      std::move (*this)(std::forward<SubT>(subobject), substr_);
    }

    template <typename SubT>
    void operator()(SubT&& subobject, absl::string_view substr) && {
      RIEGELI_ASSERT_EQ(substr_.size(), substr.size())
          << "ExternalRef: size mismatch";
      // The constructor processes the subobject.
      ConverterToExternalDataSubstr<SubT> converter(
          std::forward<SubT>(subobject), substr, context_, use_external_data_);
    }

   private:
    friend class ExternalRef;

    ABSL_ATTRIBUTE_ALWAYS_INLINE
    explicit ConverterToExternalDataSubstr(
        T&& object, absl::string_view substr, void* context,
        UseExternalDataFunction use_external_data)
        : substr_(substr),
          context_(context),
          use_external_data_(use_external_data) {
      AssertSubstr(object, substr_);
      if (RiegeliExternalCopy(&object)) {
        use_external_data_(context_, ExternalDataCopy(substr_));
        ExternalRef::CallOperatorSubstr(std::forward<T>(object), substr_);
        return;
      }
      std::move(*this).Callback(std::forward<T>(object));
    }

    template <
        typename DependentT = T,
        std::enable_if_t<HasToExternalDataSubstr<DependentT>::value, int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && {
      use_external_data_(context_, ExternalRef::ToExternalDataSubstr(
                                       std::forward<T>(object), substr_));
    }
    template <
        typename DependentT = T,
        std::enable_if_t<
            std::conjunction_v<
                std::negation<HasToExternalDataSubstr<DependentT>>,
                HasToExternalDataSubstr<absl::remove_cvref_t<DependentT>>>,
            int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && {
      use_external_data_(context_,
                         ExternalRef::ToExternalDataSubstr(
                             absl::remove_cvref_t<T>(object), substr_));
    }
    template <
        typename DependentT = T,
        std::enable_if_t<
            std::conjunction_v<std::negation<HasToExternalDataSubstr<
                                   absl::remove_cvref_t<DependentT>>>,
                               HasExternalDelegateSubstr<
                                   DependentT, ConverterToExternalDataSubstr>>,
            int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && {
      ExternalRef::ExternalDelegateSubstr(std::forward<T>(object), substr_,
                                          std::move(*this));
    }
    template <
        typename DependentT = T,
        std::enable_if_t<
            std::conjunction_v<
                std::negation<
                    HasToExternalDataSubstr<absl::remove_cvref_t<DependentT>>>,
                std::negation<HasExternalDelegateSubstr<
                    DependentT, ConverterToExternalDataSubstr>>,
                HasExternalDelegateSubstr<absl::remove_cvref_t<DependentT>,
                                          ConverterToExternalDataSubstr>>,
            int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && {
      ExternalRef::ExternalDelegateSubstr(absl::remove_cvref_t<T>(object),
                                          substr_, std::move(*this));
    }
    template <typename DependentT = T,
              std::enable_if_t<
                  std::conjunction_v<std::negation<HasToExternalDataSubstr<
                                         absl::remove_cvref_t<DependentT>>>,
                                     std::negation<HasExternalDelegateSubstr<
                                         absl::remove_cvref_t<DependentT>,
                                         ConverterToExternalDataSubstr>>>,
                  int> = 0>
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && {
      use_external_data_(
          context_,
          ExternalData{
              ExternalStorage(
                  new ExternalObjectSubstr<std::decay_t<T>>(
                      std::forward<T>(object), substr_),
                  [](void* ptr) {
                    delete static_cast<ExternalObjectSubstr<std::decay_t<T>>*>(
                        ptr);
                  }),
              substr_});
    }

    absl::string_view substr_;
    void* context_;
    UseExternalDataFunction use_external_data_;
  };

  class StorageBase {
   protected:
    StorageBase() = default;

    StorageBase(const StorageBase&) = delete;
    StorageBase& operator=(const StorageBase&) = delete;

    virtual ~StorageBase() = default;

    void Initialize(absl::string_view substr) { substr_ = substr; }

   private:
    friend class ExternalRef;

    // Converts the external object either to `absl::string_view` or
    // `Chain::Block` by calling once either `use_string_view` or
    // `use_chain_block`.
    virtual void ToChainBlock(size_t max_bytes_to_copy, void* context,
                              UseStringViewFunction use_string_view,
                              UseChainBlockFunction use_chain_block) && = 0;

    // Converts the external object either to `absl::string_view` or
    // `absl::Cord` by calling once either `use_string_view` or `use_cord`.
    virtual void ToCord(size_t max_bytes_to_copy, void* context,
                        UseStringViewFunction use_string_view,
                        UseCordFunction use_cord) && = 0;

    // Converts the external object to `ExternalData` by calling once
    // `use_external_data`.
    virtual void ToExternalData(
        void* context, UseExternalDataFunction use_external_data) && = 0;

    bool empty() const { return substr_.empty(); }
    const char* data() const { return substr_.data(); }
    size_t size() const { return substr_.size(); }
    absl::string_view substr() const { return substr_; }

    absl::string_view substr_;
  };

  template <typename T>
  class StorageWholeWithoutCallOperator final : public StorageBase {
   public:
    StorageWholeWithoutCallOperator() = default;

    StorageWholeWithoutCallOperator(const StorageWholeWithoutCallOperator&) =
        delete;
    StorageWholeWithoutCallOperator& operator=(
        const StorageWholeWithoutCallOperator&) = delete;

   private:
    friend class ExternalRef;

    void Initialize(Initializer<T> object) {
      object_.emplace(
          std::move(object).Reference(std::move(temporary_storage_)));
      StorageBase::Initialize(BytesRef(*object_));
    }

    void ToChainBlock(size_t max_bytes_to_copy, void* context,
                      UseStringViewFunction use_string_view,
                      UseChainBlockFunction use_chain_block) &&
        override {
      if (size() <= max_bytes_to_copy) {
        use_string_view(context, substr());
        return;
      }
      // The constructor processes the object.
      ConverterToChainBlockWhole<T> converter(*std::move(object_), substr(),
                                              context, use_string_view,
                                              use_chain_block);
    }

    void ToCord(size_t max_bytes_to_copy, void* context,
                UseStringViewFunction use_string_view,
                UseCordFunction use_cord) &&
        override {
      if (size() <= max_bytes_to_copy) {
        use_string_view(context, substr());
        return;
      }
      // The constructor processes the object.
      ConverterToCordWhole<T> converter(*std::move(object_), substr(), context,
                                        use_string_view, use_cord);
    }

    void ToExternalData(void* context,
                        UseExternalDataFunction use_external_data) &&
        override {
      // The constructor processes the object.
      ConverterToExternalDataWhole<T> converter(*std::move(object_), substr(),
                                                context, use_external_data);
    }

    TemporaryStorage<T&&> object_;
    ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS TemporaryStorage<T> temporary_storage_;
  };

  template <typename T>
  class StorageWholeWithCallOperator final : public StorageBase {
   public:
    StorageWholeWithCallOperator() = default;

    StorageWholeWithCallOperator(const StorageWholeWithCallOperator&) = delete;
    StorageWholeWithCallOperator& operator=(
        const StorageWholeWithCallOperator&) = delete;

    ~StorageWholeWithCallOperator() {
      if (object_ != nullptr) {
        ExternalRef::CallOperatorSubstr(std::forward<T>(*object_), substr());
      }
    }

   private:
    friend class ExternalRef;

    void Initialize(Initializer<T> object) {
      T&& reference =
          std::move(object).Reference(std::move(temporary_storage_));
      object_ = &reference;
      StorageBase::Initialize(BytesRef(*object_));
    }

    void ToChainBlock(size_t max_bytes_to_copy, void* context,
                      UseStringViewFunction use_string_view,
                      UseChainBlockFunction use_chain_block) &&
        override {
      if (size() <= max_bytes_to_copy) {
        use_string_view(context, substr());
        return;
      }
      // The constructor processes the object.
      ConverterToChainBlockWhole<T> converter(
          ExtractObject(), substr(), context, use_string_view, use_chain_block);
    }

    void ToCord(size_t max_bytes_to_copy, void* context,
                UseStringViewFunction use_string_view,
                UseCordFunction use_cord) &&
        override {
      if (size() <= max_bytes_to_copy) {
        use_string_view(context, substr());
        return;
      }
      // The constructor processes the object.
      ConverterToCordWhole<T> converter(ExtractObject(), substr(), context,
                                        use_string_view, use_cord);
    }

    void ToExternalData(void* context,
                        UseExternalDataFunction use_external_data) &&
        override {
      // The constructor processes the object.
      ConverterToExternalDataWhole<T> converter(ExtractObject(), substr(),
                                                context, use_external_data);
    }

    T&& ExtractObject() {
      return std::forward<T>(*std::exchange(object_, nullptr));
    }

    std::remove_reference_t<T>* object_ = nullptr;
    ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS TemporaryStorage<T> temporary_storage_;
  };

  template <typename Arg>
  class StorageSubstrWithoutCallOperator final : public StorageBase {
   private:
    friend class ExternalRef;

    using T = TargetRefT<Arg>;

    void Initialize(Arg arg, absl::string_view substr) {
      StorageBase::Initialize(substr);
      arg_.emplace(std::forward<Arg>(arg));
    }

    void ToChainBlock(size_t max_bytes_to_copy, void* context,
                      UseStringViewFunction use_string_view,
                      UseChainBlockFunction use_chain_block) &&
        override {
      if (size() <= max_bytes_to_copy) {
        use_string_view(context, substr());
        return;
      }
      // The constructor processes the object.
      ConverterToChainBlockSubstr<T> converter(
          initializer().Reference(), substr(), context, use_string_view,
          use_chain_block);
    }

    void ToCord(size_t max_bytes_to_copy, void* context,
                UseStringViewFunction use_string_view,
                UseCordFunction use_cord) &&
        override {
      if (size() <= max_bytes_to_copy) {
        use_string_view(context, substr());
        return;
      }
      // The constructor processes the object.
      ConverterToCordSubstr<T> converter(initializer().Reference(), substr(),
                                         context, use_string_view, use_cord);
    }

    void ToExternalData(void* context,
                        UseExternalDataFunction use_external_data) &&
        override {
      // The constructor processes the object.
      ConverterToExternalDataSubstr<T> converter(
          initializer().Reference(), substr(), context, use_external_data);
    }

    Initializer<T> initializer() { return std::forward<Arg>(*arg_); }

    TemporaryStorage<Arg&&> arg_;
  };

  template <typename T>
  class StorageSubstrWithCallOperator final : public StorageBase {
   public:
    StorageSubstrWithCallOperator() = default;

    StorageSubstrWithCallOperator(const StorageSubstrWithCallOperator&) =
        delete;
    StorageSubstrWithCallOperator& operator=(
        const StorageSubstrWithCallOperator&) = delete;

    ~StorageSubstrWithCallOperator() {
      if (object_ != nullptr) {
        ExternalRef::CallOperatorSubstr(std::forward<T>(*object_), substr());
      }
    }

   private:
    friend class ExternalRef;

    void Initialize(Initializer<T> object, absl::string_view substr) {
      StorageBase::Initialize(substr);
      T&& reference =
          std::move(object).Reference(std::move(temporary_storage_));
      object_ = &reference;
    }

    void ToChainBlock(size_t max_bytes_to_copy, void* context,
                      UseStringViewFunction use_string_view,
                      UseChainBlockFunction use_chain_block) &&
        override {
      if (size() <= max_bytes_to_copy) {
        use_string_view(context, substr());
        return;
      }
      // The constructor processes the object.
      ConverterToChainBlockSubstr<T> converter(
          ExtractObject(), substr(), context, use_string_view, use_chain_block);
    }

    void ToCord(size_t max_bytes_to_copy, void* context,
                UseStringViewFunction use_string_view,
                UseCordFunction use_cord) &&
        override {
      if (size() <= max_bytes_to_copy) {
        use_string_view(context, substr());
        return;
      }
      // The constructor processes the object.
      ConverterToCordSubstr<T> converter(ExtractObject(), substr(), context,
                                         use_string_view, use_cord);
    }

    void ToExternalData(void* context,
                        UseExternalDataFunction use_external_data) &&
        override {
      // The constructor processes the object.
      ConverterToExternalDataSubstr<T> converter(ExtractObject(), substr(),
                                                 context, use_external_data);
    }

    T&& ExtractObject() {
      return std::forward<T>(*std::exchange(object_, nullptr));
    }

    std::remove_reference_t<T>* object_ = nullptr;
    ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS TemporaryStorage<T> temporary_storage_;
  };

  template <typename T, typename Enable = void>
  struct StorageWholeImpl;

  template <typename T>
  struct StorageWholeImpl<
      T, std::enable_if_t<!HasCallOperator<absl::remove_cvref_t<T>>::value>> {
    using type = StorageWholeWithoutCallOperator<T>;
  };

  template <typename T>
  struct StorageWholeImpl<
      T, std::enable_if_t<HasCallOperator<absl::remove_cvref_t<T>>::value>> {
    using type = StorageWholeWithCallOperator<T>;
  };

  template <typename Arg, typename Enable = void>
  struct StorageSubstrImpl;

  template <typename Arg>
  struct StorageSubstrImpl<Arg,
                           std::enable_if_t<!HasCallOperator<
                               absl::remove_cvref_t<TargetRefT<Arg>>>::value>> {
    using type = StorageSubstrWithoutCallOperator<Arg>;
  };

  template <typename Arg>
  struct StorageSubstrImpl<
      Arg, std::enable_if_t<
               HasCallOperator<absl::remove_cvref_t<TargetRefT<Arg>>>::value>> {
    using type = StorageSubstrWithCallOperator<TargetRefT<Arg>>;
  };

 public:
  // The type of the `storage` parameter for the constructor and
  // `ExternalRef::From()` which take an external object convertible
  // to `BytesRef`.
  template <typename T>
  using StorageWhole = typename StorageWholeImpl<T>::type;

  // The type of the `storage` parameter for the constructor and
  // `ExternalRef::From()` which take an external object and its substring.
  template <typename Arg>
  using StorageSubstr = typename StorageSubstrImpl<Arg>::type;

  // Constructs an `ExternalRef` from an external object or its `Initializer`.
  // See class comments for expectations on the external object.
  //
  // The object must be convertible to `BytesRef`.
  //
  // `storage` must outlive usages of the returned `ExternalRef`.
  template <typename Arg,
            std::enable_if_t<SupportsExternalRefWhole<TargetRefT<Arg>>::value,
                             int> = 0>
  /*implicit*/ ExternalRef(Arg&& arg ABSL_ATTRIBUTE_LIFETIME_BOUND,
                           StorageWhole<TargetRefT<Arg>>&& storage
                               ABSL_ATTRIBUTE_LIFETIME_BOUND = {})
      : storage_(&storage) {
    storage.Initialize(std::forward<Arg>(arg));
  }

  // Constructs an `ExternalRef` from an external object or its `Initializer`.
  // See class comments for expectations on the external object.
  //
  // `substr` must be owned by the object if it gets created or moved.
  //
  // `storage` must outlive usages of the returned `ExternalRef`.
  //
  // The object is not created if an initializer is passed rather than an
  // already constructed object, the object type does not use the call operator,
  // and only `absl::string_view` turns out to be needed. Hence `StorageSubstr`
  // is parameterized by `Arg&&` rather than `TargetRefT<Arg>`, so that it can
  // keep the original initializer.
  template <typename Arg,
            std::enable_if_t<SupportsExternalRefSubstr<TargetRefT<Arg>>::value,
                             int> = 0>
  explicit ExternalRef(
      Arg&& arg ABSL_ATTRIBUTE_LIFETIME_BOUND, absl::string_view substr,
      StorageSubstr<Arg&&>&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND = {})
      : storage_(&storage) {
    storage.Initialize(std::forward<Arg>(arg), substr);
  }

  ExternalRef(ExternalRef&& that) = default;
  ExternalRef& operator=(ExternalRef&&) = delete;

  // Like `ExternalRef` constructor, but `RiegeliSupportsExternalRef()` or
  // `RiegeliSupportsExternalRefWhole()` is not needed. The caller is
  // responsible for using an appropriate type of the external object.
  template <typename Arg,
            std::enable_if_t<std::is_convertible_v<TargetRefT<Arg>, BytesRef>,
                             int> = 0>
  static ExternalRef From(Arg&& arg ABSL_ATTRIBUTE_LIFETIME_BOUND,
                          StorageWhole<TargetRefT<Arg>>&& storage
                              ABSL_ATTRIBUTE_LIFETIME_BOUND = {}) {
    storage.Initialize(std::forward<Arg>(arg));
    return ExternalRef(&storage);
  }

  // Like `ExternalRef` constructor, but `RiegeliSupportsExternalRef()` is not
  // needed. The caller is responsible for using an appropriate type of the
  // external object.
  //
  // The object is not created if an initializer is passed rather than an
  // already constructed object, the object type does not use the call operator,
  // and only `absl::string_view` turns out to be needed. Hence `StorageSubstr`
  // is parameterized by `Arg&&` rather than `TargetRefT<Arg>`, so that it can
  // keep the original initializer.
  template <typename Arg>
  static ExternalRef From(
      Arg&& arg ABSL_ATTRIBUTE_LIFETIME_BOUND, absl::string_view substr,
      StorageSubstr<Arg&&>&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND = {}) {
    storage.Initialize(std::forward<Arg>(arg), substr);
    return ExternalRef(&storage);
  }

  // Returns `true` if the data size is 0.
  bool empty() const { return storage_->empty(); }

  // Returns the data pointer.
  const char* data() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return storage_->data();
  }

  // Returns the data size.
  size_t size() const { return storage_->size(); }

  // Returns the data as `absl::string_view`.
  //
  // This `ExternalRef` must outlive usages of the returned `absl::string_view`.
  /*implicit*/ operator absl::string_view() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return storage_->substr();
  }

  // The data can be converted to `Chain` using:
  //  * `Chain::Chain(ExternalRef)`
  //  * `Chain::Reset(ExternalRef)` or `riegeli::Reset(Chain&, ExternalRef)`
  //  * `Chain::Append(ExternalRef)`
  //  * `Chain::Prepend(ExternalRef)`

  // Converts the data to `absl::Cord`.
  explicit operator absl::Cord() && {
    absl::Cord result;
    // Destruction of a just default-constructed `absl::Cord` can be optimized
    // out. Construction in place is more efficient than assignment.
    result.~Cord();
    std::move(*storage_).ToCord(
        cord_internal::kMaxBytesToCopyToEmptyCord, &result,
        [](void* context, absl::string_view data) {
          new (context) absl::Cord(cord_internal::MakeBlockyCord(data));
        },
        [](void* context, absl::Cord data) {
          new (context) absl::Cord(std::move(data));
        });
    return result;
  }

  // Supports `riegeli::Reset(absl::Cord&, ExternalRef)`.
  friend void RiegeliReset(absl::Cord& dest, ExternalRef src) {
    std::move(src).AssignTo(dest);
  }

  // Appends the data to `dest`.
  void AppendTo(absl::Cord& dest) && {
    std::move(*storage_).ToCord(
        cord_internal::MaxBytesToCopyToCord(dest), &dest,
        [](void* context, absl::string_view data) {
          cord_internal::AppendToBlockyCord(data,
                                            *static_cast<absl::Cord*>(context));
        },
        [](void* context, absl::Cord data) {
          static_cast<absl::Cord*>(context)->Append(std::move(data));
        });
  }

  // Prepends the data to `dest`.
  void PrependTo(absl::Cord& dest) && {
    std::move(*storage_).ToCord(
        cord_internal::MaxBytesToCopyToCord(dest), &dest,
        [](void* context, absl::string_view data) {
          cord_internal::PrependToBlockyCord(
              data, *static_cast<absl::Cord*>(context));
        },
        [](void* context, absl::Cord data) {
          static_cast<absl::Cord*>(context)->Prepend(std::move(data));
        });
  }

  // Returns a type-erased external object with its deleter and data.
  explicit operator ExternalData() && {
    ExternalData result{ExternalStorage(nullptr, nullptr), absl::string_view()};
    // Destruction of just constructed `ExternalData` can be optimized out.
    // Construction in place is more efficient than assignment.
    result.~ExternalData();
    std::move(*storage_).ToExternalData(
        &result, [](void* context, ExternalData data) {
          new (context) ExternalData(std::move(data));
        });
    return result;
  }

 private:
  // For `InitializeTo()`, `AssignTo()`, `AppendTo()`, and `PrependTo()`.
  friend class Chain;

  explicit ExternalRef(StorageBase* storage) : storage_(storage) {}

  // Assigns the data to `dest` which is expected to be just
  // default-constructed.
  void InitializeTo(Chain& dest) && {
    // Destruction of a just default-constructed `Chain` can be optimized out.
    // Construction in place is more efficient than assignment.
    dest.~Chain();
    std::move(*storage_).ToChainBlock(
        Chain::kMaxBytesToCopyToEmpty, &dest,
        [](void* context, absl::string_view data) {
          new (context) Chain(data);
        },
        [](void* context, Chain::Block data) {
          new (context) Chain(std::move(data));
        });
  }

  // Assigns the data to `dest`.
  void AssignTo(Chain& dest) && {
    std::move(*storage_).ToChainBlock(
        Chain::kMaxBytesToCopyToEmpty, &dest,
        [](void* context, absl::string_view data) {
          static_cast<Chain*>(context)->Reset(data);
        },
        [](void* context, Chain::Block data) {
          static_cast<Chain*>(context)->Reset(std::move(data));
        });
  }

  // Assigns the data to `dest`.
  void AssignTo(absl::Cord& dest) && {
    std::move(*storage_).ToCord(
        cord_internal::kMaxBytesToCopyToEmptyCord, &dest,
        [](void* context, absl::string_view data) {
          cord_internal::AssignToBlockyCord(data,
                                            *static_cast<absl::Cord*>(context));
        },
        [](void* context, absl::Cord data) {
          *static_cast<absl::Cord*>(context) = std::move(data);
        });
  }

  // Appends the data to `dest`.
  void AppendTo(Chain& dest) && {
    std::move(*storage_).ToChainBlock(
        dest.MaxBytesToCopy(), &dest,
        [](void* context, absl::string_view data) {
          static_cast<Chain*>(context)->Append(data);
        },
        [](void* context, Chain::Block data) {
          static_cast<Chain*>(context)->Append(std::move(data));
        });
  }
  void AppendTo(Chain& dest, Chain::Options options) && {
    ChainWithOptions chain_with_options = {&dest, options};
    std::move(*storage_).ToChainBlock(
        dest.MaxBytesToCopy(options), &chain_with_options,
        [](void* context, absl::string_view data) {
          static_cast<ChainWithOptions*>(context)->dest->Append(
              data, static_cast<ChainWithOptions*>(context)->options);
        },
        [](void* context, Chain::Block data) {
          static_cast<ChainWithOptions*>(context)->dest->Append(
              std::move(data),
              static_cast<ChainWithOptions*>(context)->options);
        });
  }

  // Prepends the data to `dest`.
  void PrependTo(Chain& dest) && {
    std::move(*storage_).ToChainBlock(
        dest.MaxBytesToCopy(), &dest,
        [](void* context, absl::string_view data) {
          static_cast<Chain*>(context)->Prepend(data);
        },
        [](void* context, Chain::Block data) {
          static_cast<Chain*>(context)->Prepend(std::move(data));
        });
  }
  void PrependTo(Chain& dest, Chain::Options options) && {
    ChainWithOptions chain_with_options = {&dest, options};
    std::move(*storage_).ToChainBlock(
        dest.MaxBytesToCopy(options), &chain_with_options,
        [](void* context, absl::string_view data) {
          static_cast<ChainWithOptions*>(context)->dest->Prepend(
              data, static_cast<ChainWithOptions*>(context)->options);
        },
        [](void* context, Chain::Block data) {
          static_cast<ChainWithOptions*>(context)->dest->Prepend(
              std::move(data),
              static_cast<ChainWithOptions*>(context)->options);
        });
  }

  struct ChainWithOptions {
    Chain* dest;
    Chain::Options options;
  };

  StorageBase* storage_;
};

}  // namespace riegeli

#endif  // RIEGELI_BASE_EXTERNAL_REF_BASE_H_


================================================
FILE: riegeli/base/external_ref_support.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_EXTERNAL_REF_SUPPORT_H_
#define RIEGELI_BASE_EXTERNAL_REF_SUPPORT_H_

#include <stddef.h>

#include <memory>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/external_data.h"

namespace riegeli {

// Default implementation for `ExternalRef` support.
inline bool RiegeliExternalCopy(ABSL_ATTRIBUTE_UNUSED const void* self) {
  return false;
}

// Indicates support for `ExternalRef(std::string&&)`.
void RiegeliSupportsExternalRefWhole(std::string*);

// Indicates support for:
//  * `ExternalRef(std::vector<char>&&)`
//  * `ExternalRef(std::vector<T>&&, substr)`
template <typename T>
void RiegeliSupportsExternalRef(std::vector<T>*);

// Indicates support for `ExternalRef(std::unique_ptr<T, Deleter>&&, substr)`.
template <typename T, typename Deleter>
void RiegeliSupportsExternalRef(std::unique_ptr<T, Deleter>*);

template <typename T>
inline ExternalStorage RiegeliToExternalStorage(std::unique_ptr<T>* self) {
  return ExternalStorage(const_cast<std::remove_cv_t<T>*>(self->release()),
                         [](void* ptr) { delete static_cast<T*>(ptr); });
}

template <typename T>
inline ExternalStorage RiegeliToExternalStorage(std::unique_ptr<T[]>* self) {
  return ExternalStorage(const_cast<std::remove_cv_t<T>*>(self->release()),
                         [](void* ptr) { delete[] static_cast<T*>(ptr); });
}

// Indicates support for:
//  * `ExternalRef(const std::shared_ptr<T>&, substr)`
//  * `ExternalRef(std::shared_ptr<T>&&, substr)`
template <typename T>
void RiegeliSupportsExternalRef(const std::shared_ptr<T>*);

namespace external_ref_internal {

template <typename T>
struct PointerType {
  using type = T*;
};
template <typename T>
struct PointerType<T&> {
  using type = const T*;
};
template <typename T>
struct PointerType<T&&> {
  using type = T*;
};

template <typename T>
using PointerTypeT = typename PointerType<T>::type;

template <typename T, typename Enable = void>
struct HasRiegeliSupportsExternalRefWhole : std::false_type {};

template <typename T>
struct HasRiegeliSupportsExternalRefWhole<
    T, std::void_t<decltype(RiegeliSupportsExternalRefWhole(
           std::declval<PointerTypeT<T>>()))>> : std::true_type {};

template <typename T, typename Enable = void>
struct HasRiegeliSupportsExternalRef : std::false_type {};

template <typename T>
struct HasRiegeliSupportsExternalRef<
    T, std::void_t<decltype(RiegeliSupportsExternalRef(
           std::declval<PointerTypeT<T>>()))>> : std::true_type {};

}  // namespace external_ref_internal

template <typename T>
struct SupportsExternalRefWhole
    : std::conjunction<
          std::disjunction<
              external_ref_internal::HasRiegeliSupportsExternalRefWhole<T>,
              external_ref_internal::HasRiegeliSupportsExternalRef<T>>,
          std::is_convertible<const T&, BytesRef>> {};

template <typename T>
struct SupportsExternalRefSubstr
    : external_ref_internal::HasRiegeliSupportsExternalRef<T> {};

}  // namespace riegeli

#endif  // RIEGELI_BASE_EXTERNAL_REF_SUPPORT_H_


================================================
FILE: riegeli/base/global.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_GLOBAL_H_
#define RIEGELI_BASE_GLOBAL_H_

#include <functional>
#include <new>
#include <type_traits>

#include "absl/base/nullability.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// `Global<T>()` returns a const reference to a default-constructed object of
// type `T`.
//
// All calls with the given `T` type return a reference to the same object.
//
// The object is created when `Global` is first called with the given `T` type,
// and is never destroyed.
template <typename T,
          std::enable_if_t<std::is_default_constructible_v<T>, int> = 0>
const T& Global();

// `Global(construct)` returns a reference to an object returned by `construct`.
//
// The object is created when `Global` is first called with the given
// `construct` type, and is never destroyed.
//
// If `T` is not const-qualified, this is recommended only when the object is
// thread-safe, or when it will be accessed only in a thread-safe way despite
// its non-const type.
//
// The `construct` type should be a lambda with no captures. This restriction is
// a safeguard against making the object dependent on local state, which would
// be misleadingly ignored for subsequent calls. Since distinct lambdas have
// distinct types, distinct call sites with lambdas return references to
// distinct objects.
template <typename Construct,
          std::enable_if_t<std::conjunction_v<std::is_empty<Construct>,
                                              std::is_invocable<Construct>>,
                           int> = 0>
std::decay_t<std::invoke_result_t<Construct>>& Global(Construct construct);

// `Global(construct, initialize)` returns a reference to an object returned by
// `construct`. After construction, `initialize` is called on the reference.
//
// The object is created when `Global` is first called with the given
// `construct` and `initialize` types, and is never destroyed.
//
// If `T` is not const-qualified, this is recommended only when the object is
// thread-safe, or when it will be accessed only in a thread-safe way despite
// its non-const type.
//
// The `construct` and `initialize` types should be lambdas with no captures.
// This restriction is a safeguard against making the object dependent on local
// state, which would be misleadingly ignored for subsequent calls. Since
// distinct lambdas have distinct types, distinct call sites with lambdas return
// references to distinct objects.
template <
    typename Construct, typename Initialize,
    std::enable_if_t<
        std::conjunction_v<
            std::is_empty<Construct>, std::is_empty<Initialize>,
            std::is_invocable<Initialize,
                              std::decay_t<std::invoke_result_t<Construct>>&>>,
        int> = 0>
std::decay_t<std::invoke_result_t<Construct>>& Global(Construct construct,
                                                      Initialize initialize);

// Implementation details follow.

namespace global_internal {

template <typename T>
class NoDestructor {
 public:
  NoDestructor() { new (storage_) T(); }

  template <typename Construct>
  explicit NoDestructor(Construct construct) {
    new (storage_) T(std::invoke(construct));
  }

  template <typename Construct, typename Initialize>
  explicit NoDestructor(Construct construct, Initialize initialize) {
    new (storage_) T(std::invoke(construct));
    std::invoke(initialize, object());
  }

  NoDestructor(const NoDestructor&) = delete;
  NoDestructor& operator=(const NoDestructor&) = delete;

  T& object() { return *std::launder(reinterpret_cast<T*>(storage_)); }

 private:
  alignas(T) char storage_[sizeof(T)];
};

}  // namespace global_internal

template <typename T, std::enable_if_t<std::is_default_constructible_v<T>, int>>
inline const T& Global() {
  static global_internal::NoDestructor<const T> kStorage;
  return kStorage.object();
}

template <typename Construct,
          std::enable_if_t<std::conjunction_v<std::is_empty<Construct>,
                                              std::is_invocable<Construct>>,
                           int>>
inline std::decay_t<std::invoke_result_t<Construct>>& Global(
    Construct construct) {
  static global_internal::NoDestructor<
      std::decay_t<std::invoke_result_t<Construct>>>
      kStorage(construct);
  return kStorage.object();
}

template <
    typename Construct, typename Initialize,
    std::enable_if_t<
        std::conjunction_v<
            std::is_empty<Construct>, std::is_empty<Initialize>,
            std::is_invocable<Initialize,
                              std::decay_t<std::invoke_result_t<Construct>>&>>,
        int>>
inline std::decay_t<std::invoke_result_t<Construct>>& Global(
    Construct construct, Initialize initialize) {
  static global_internal::NoDestructor<
      std::decay_t<std::invoke_result_t<Construct>>>
      kStorage(construct, initialize);
  return kStorage.object();
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_GLOBAL_H_


================================================
FILE: riegeli/base/hybrid_direct_common.h
================================================
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_HYBRID_DIRECT_COMMON_H_
#define RIEGELI_BASE_HYBRID_DIRECT_COMMON_H_

// IWYU pragma: private, include "riegeli/base/hybrid_direct_map.h"
// IWYU pragma: private, include "riegeli/base/hybrid_direct_set.h"

#include <stddef.h>

#include <type_traits>

#include "absl/base/nullability.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

namespace hybrid_direct_internal {

template <typename Key, typename Enable = void>
struct HasRiegeliHybridDirectToRawKey : std::false_type {};

template <typename Key>
struct HasRiegeliHybridDirectToRawKey<
    Key, std::enable_if_t<std::is_unsigned_v<
             decltype(RiegeliHybridDirectToRawKey(std::declval<Key>()))>>>
    : std::true_type {};

template <typename Key, typename Enable = void>
struct HasRiegeliHybridDirectFromRawKey : std::false_type {};

template <typename Key>
struct HasRiegeliHybridDirectFromRawKey<
    Key, std::enable_if_t<std::is_convertible_v<
             decltype(RiegeliHybridDirectFromRawKey(
                 RiegeliHybridDirectToRawKey(std::declval<Key>()),
                 static_cast<Key*>(nullptr))),
             Key>>> : std::true_type {};

}  // namespace hybrid_direct_internal

// The default `Traits` parameter for `HybridDirectMap` and `HybridDirectSet`,
// which specifies a mapping of keys to an unsigned integer type.
//
// Key types supported by default are integral types, enum types, and types
// supporting `RiegeliHybridDirectToRawKey()` as below. The latter takes
// precedence.
//
// To override `HybridDirectTraits` for a type `Key`, define a free function
// `friend RawKey RiegeliHybridDirectToRawKey(Key key)` as a friend of `Key`
// inside class definition or in the same namespace as `Key`, so that it can be
// found via ADL. Different `Key` values must yield different `RawKey` values.
//
// Optionally, define also a free function
// `friend Key RiegeliHybridDirectFromRawKey(RawKey raw_key, Key*)`.
// This is needed only for iterators.
//
// The second argument of `RiegeliHybridDirectFromRawKey()` is always a null
// pointer, used to choose the right overload based on the type.
//
// `expected_min_key` is the expected lower bound of keys. Keys smaller than
// that are never put in the array. `expected_min_key` has a type which supports
// `static_cast<RawKey>(expected_min_key)`.
template <typename Key, auto expected_min_key = 0, typename Enable = void>
struct HybridDirectTraits;

template <typename Key, auto expected_min_key>
struct HybridDirectTraits<
    Key, expected_min_key,
    std::enable_if_t<
        hybrid_direct_internal::HasRiegeliHybridDirectToRawKey<Key>::value>> {
 private:
  using RawKey = decltype(RiegeliHybridDirectToRawKey(std::declval<Key>()));

 public:
  static RawKey ToRawKey(Key key) {
    // Wrap-around is not an error.
    return static_cast<RawKey>(RiegeliHybridDirectToRawKey(key) -
                               static_cast<RawKey>(expected_min_key));
  }

  template <
      typename DependentKey = Key,
      std::enable_if_t<hybrid_direct_internal::HasRiegeliHybridDirectFromRawKey<
                           DependentKey>::value,
                       int> = 0>
  static Key FromRawKey(RawKey raw_key) {
    // Wrap-around is not an error.
    return RiegeliHybridDirectFromRawKey(
        static_cast<RawKey>(raw_key + static_cast<RawKey>(expected_min_key)),
        static_cast<Key*>(nullptr));
  }
};

template <typename Key, auto expected_min_key>
struct HybridDirectTraits<
    Key, expected_min_key,
    std::enable_if_t<std::conjunction_v<
        std::negation<
            hybrid_direct_internal::HasRiegeliHybridDirectToRawKey<Key>>,
        std::is_integral<Key>>>> {
 private:
  using RawKey = std::make_unsigned_t<Key>;

 public:
  static RawKey ToRawKey(Key key) {
    // Wrap-around is not an error.
    return static_cast<RawKey>(static_cast<RawKey>(key) -
                               static_cast<RawKey>(expected_min_key));
  }

  static Key FromRawKey(RawKey raw_key) {
    // Wrap-around is not an error.
    return static_cast<Key>(
        static_cast<RawKey>(raw_key + static_cast<RawKey>(expected_min_key)));
  }
};

template <typename Key, auto expected_min_key>
struct HybridDirectTraits<
    Key, expected_min_key,
    std::enable_if_t<std::conjunction_v<
        std::negation<
            hybrid_direct_internal::HasRiegeliHybridDirectToRawKey<Key>>,
        std::is_enum<Key>>>> {
 private:
  using RawKey = std::make_unsigned_t<std::underlying_type_t<Key>>;

 public:
  static RawKey ToRawKey(Key key) {
    // Wrap-around is not an error.
    return static_cast<RawKey>(static_cast<RawKey>(key) -
                               static_cast<RawKey>(expected_min_key));
  }

  static Key FromRawKey(RawKey raw_key) {
    // Wrap-around is not an error.
    return static_cast<Key>(
        static_cast<RawKey>(raw_key + static_cast<RawKey>(expected_min_key)));
  }
};

// The default `direct_capacity` parameter for `HybridDirectMap` and
// `HybridDirectSet` building.
constexpr size_t kHybridDirectDefaultDirectCapacity = 128;

}  // namespace riegeli

#endif  // RIEGELI_BASE_HYBRID_DIRECT_COMMON_H_


================================================
FILE: riegeli/base/hybrid_direct_internal.h
================================================
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_HYBRID_DIRECT_INTERNAL_H_
#define RIEGELI_BASE_HYBRID_DIRECT_INTERNAL_H_

#include <stddef.h>

#include <limits>
#include <memory>
#include <new>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/compare.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli::hybrid_direct_internal {

// Wraps a `T` which is constructed explicitly later, rather than when
// `DelayedConstructor<T>` is constructed.
//
// In contrast to `std::optional<T>`, this avoids the overhead of tracking
// whether the object has been constructed, at the cost of passing this
// responsibility to the caller.
//
// Either `emplace()` or `Abandon()` must be called exactly once.
// If `emplace()` is called, the regular destructor should be called later.
// If `Abandon()` is called, the regular destructor must not be called.
template <typename T>
class DelayedConstructor {
 public:
  // Does not construct the wrapped object yet.
  DelayedConstructor() noexcept {}

  DelayedConstructor(const DelayedConstructor&) = delete;
  DelayedConstructor& operator=(const DelayedConstructor&) = delete;

  // Destroys the wrapped object. It must have been constructed.
  ~DelayedConstructor() { value_.~T(); }

  // Constructs the wrapped object. It must not have been constructed yet.
  template <typename... Args,
            std::enable_if_t<std::is_constructible_v<T, Args&&...>, int> = 0>
  T& emplace(Args&&... args) ABSL_ATTRIBUTE_LIFETIME_BOUND {
    new (&value_) T(std::forward<Args>(args)...);
    return value_;
  }

  // Destroys the `DelayedConstructor`. The wrapped object must not have been
  // constructed. This is needed for `SizedArray`.
  void Abandon() {}

  // Returns the wrapped object. It must have been constructed.
  T& operator*() ABSL_ATTRIBUTE_LIFETIME_BOUND { return value_; }
  const T& operator*() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return value_; }

 private:
  union {
    T value_;
  };
};

// A deleter for `SizedArray<T, supports_abandon>`.
//
// If `supports_abandon` is true, truncation with `AbandonAfter()` is supported,
// at the cost of some overhead.
//
// A moved-from `SizedDeleter` reports a positive size. This helps to trigger
// a null pointer dereference when a moved-from `SizedArray` is used.
template <typename T, bool supports_abandon = false>
class SizedDeleter {
 public:
  static size_t max_size() { return kSizeMask / sizeof(T); }

  SizedDeleter() = default;

  explicit SizedDeleter(size_t size) : size_(size) {}

  SizedDeleter(SizedDeleter&& that) noexcept
      : size_(std::exchange(that.size_, kPoisonedSize)) {}

  SizedDeleter& operator=(SizedDeleter&& that) noexcept {
    size_ = std::exchange(that.size_, kPoisonedSize);
    return *this;
  }

  void operator()(T* ptr) const {
    for (T* iter = ptr + (size_ & kSizeMask); iter != ptr;) {
      --iter;
      iter->~T();
    }
    if (ABSL_PREDICT_FALSE((size_ & kOverallocated) != 0)) {
      // The allocated size is not tracked and sized delete must not be used.
      if constexpr (alignof(T) <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) {
        operator delete[](ptr);
      } else {
        operator delete[](ptr, std::align_val_t(alignof(T)));
      }
      return;
    }
    if constexpr (alignof(T) <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) {
      operator delete[](ptr, size_ * sizeof(T));
    } else {
      operator delete[](ptr, size_ * sizeof(T), std::align_val_t(alignof(T)));
    }
  }

  size_t size() const { return size_ & kSizeMask; }

  // If the pointer associated with this deleter is `nullptr`, returns `true`
  // when this deleter is moved-from. Otherwise the result is meaningless.
  bool IsMovedFromIfNull() const { return size_ == kPoisonedSize; }

  // Reduces the size to `new_size`. Calls `Abandon()` on elements being
  // abandoned. The regular destructor will not be called for them.
  //
  // `SizedDeleter` is optimized for the case when `AbandonAfter()` is never
  // called with a changed size.
  template <bool dependent_supports_abandon = supports_abandon,
            std::enable_if_t<dependent_supports_abandon, int> = 0>
  void AbandonAfter(T* ptr, size_t new_size) {
    RIEGELI_ASSERT_LE(new_size, size_ & kSizeMask)
        << "Failed precondition of SizedDeleter::AbandonAfter(): "
           "array size overflow";
    if (ABSL_PREDICT_TRUE(new_size == size_)) return;
    T* const new_end = ptr + new_size;
    for (T* iter = ptr + (size_ & kSizeMask); iter != new_end;) {
      --iter;
      iter->Abandon();
    }
    size_ = new_size | kOverallocated;
  }

 private:
  // A moved-from `SizedDeleter` has `size_ == kPoisonedSize`. In debug mode
  // this asserts against using a moved-from object. In non-debug mode, if the
  // key is not too large, then this triggers a null pointer dereference with an
  // offset up to 1MB, which is assumed to reliably crash.
  static constexpr size_t kPoisonedSize = (size_t{1} << 20) / sizeof(T);

  // If `supports_abandon` is true, `size_` tracks the current size and whether
  // the original size has been reduced with `AbandonAfter()`. In that case
  // sized delete is not called because the allocated size is not tracked.
  static constexpr size_t kSizeMask =
      std::numeric_limits<size_t>::max() >> (supports_abandon ? 1 : 0);
  static constexpr size_t kOverallocated = ~kSizeMask;

  // The number of elements. If marked with `kOverallocated`, the allocated size
  // is not tracked and sized delete must not be used.
  size_t size_ = 0;
};

// Like `std::unique_ptr<T[]>`, but the size is stored in the deleter.
// It is available as `get_deleter().size()` and used for sized delete.
//
// If `supports_abandon` is true, truncation with `get_deleter().AbandonAfter()`
// is supported, at the cost of some overhead.
//
// A moved-from `SizedArray` is `nullptr` but reports a positive size. This
// helps to trigger a null pointer dereference when a moved-from `SizedArray`
// is used.
template <typename T, bool supports_abandon = false>
using SizedArray = std::unique_ptr<T[], SizedDeleter<T, supports_abandon>>;

// Like `std::make_unique<T[]>(size)`.
//
// If `supports_abandon` is true, truncation with `get_deleter().AbandonAfter()`
// is supported, at the cost of some overhead.
template <typename T, bool supports_abandon = false>
inline SizedArray<T, supports_abandon> MakeSizedArray(size_t size) {
  T* ptr;
  if constexpr (alignof(T) <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) {
    ptr = static_cast<T*>(operator new[](size * sizeof(T)));
  } else {
    ptr = static_cast<T*>(operator new[](size * sizeof(T),
                                         std::align_val_t(alignof(T))));
  }
  T* const end = ptr + size;
  for (T* iter = ptr; iter != end; ++iter) {
    new (iter) T();
  }
  return SizedArray<T, supports_abandon>(
      ptr, SizedDeleter<T, supports_abandon>(size));
}

// Like `std::make_unique_for_overwrite<T[]>(size)`.
//
// If `supports_abandon` is true, truncation with `get_deleter().AbandonAfter()`
// is supported, at the cost of some overhead.
template <typename T, bool supports_abandon = false>
inline SizedArray<T, supports_abandon> MakeSizedArrayForOverwrite(size_t size) {
  T* ptr;
  if constexpr (alignof(T) <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) {
    ptr = static_cast<T*>(operator new[](size * sizeof(T)));
  } else {
    ptr = static_cast<T*>(operator new[](size * sizeof(T),
                                         std::align_val_t(alignof(T))));
  }
  T* const end = ptr + size;
  for (T* iter = ptr; iter != end; ++iter) {
    new (iter) T;
  }
  return SizedArray<T, supports_abandon>(
      ptr, SizedDeleter<T, supports_abandon>(size));
}

// Performs an assignment, but the behavior is undefined if the old value of the
// destination is not null. This allows the compiler skip generating the code
// which deletes the old value.
//
// This is meant for initializing member variables of smart pointer types in
// functions where the compiler cannot determine itself that the old value is
// always null.
template <typename Dest, typename Src>
inline void AssignToAssumedNull(Dest& dest, Src&& src) {
  RIEGELI_ASSUME_EQ(dest, nullptr)
      << "Failed precondition of AssignToAssumedNull(): "
         "old value of destination is not null";
  dest = std::forward<Src>(src);
}

// An iterator over a sequence of consecutive indices. Does not support the full
// iterator API, only what is needed by `HybridDirectMap` and `HybridDirectSet`.
template <typename Index>
class IndexIterator : public WithEqual<IndexIterator<Index>> {
 public:
  explicit IndexIterator(Index index) : index_(index) {}

  IndexIterator(const IndexIterator&) = default;
  IndexIterator& operator=(const IndexIterator&) = default;

  Index operator*() const { return index_; }
  IndexIterator& operator++() {
    ++index_;
    return *this;
  }

  friend bool operator==(const IndexIterator& a, const IndexIterator& b) {
    return a.index_ == b.index_;
  }

 private:
  Index index_;
};

}  // namespace riegeli::hybrid_direct_internal

#endif  // RIEGELI_BASE_HYBRID_DIRECT_INTERNAL_H_


================================================
FILE: riegeli/base/hybrid_direct_map.h
================================================
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_HYBRID_DIRECT_MAP_H_
#define RIEGELI_BASE_HYBRID_DIRECT_MAP_H_

#include <stddef.h>

#include <functional>
#include <initializer_list>
#include <iterator>
#include <limits>
#include <memory>
#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/container/flat_hash_map.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/debug.h"
#include "riegeli/base/hybrid_direct_common.h"  // IWYU pragma: export
#include "riegeli/base/hybrid_direct_internal.h"
#include "riegeli/base/invoker.h"
#include "riegeli/base/iterable.h"
#include "riegeli/base/type_traits.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

namespace hybrid_direct_internal {

// Part of `HybridDirectMap` excluding constructors and assignment. This is
// separated to make copy and move constructors and assignment available
// conditionally.
template <typename Key, typename Value, typename Traits>
class HybridDirectMapImpl {
 private:
  template <bool is_const>
  class IteratorImpl;

 public:
  using key_type = Key;
  using mapped_type = Value;
  using value_type = std::pair<const Key, Value>;
  using reference = ReferencePair<const Key, Value&>;
  using const_reference = ReferencePair<const Key, const Value&>;
  using pointer = ArrowProxy<reference>;
  using const_pointer = ArrowProxy<const_reference>;
  using iterator = IteratorImpl<false>;
  using const_iterator = IteratorImpl<true>;
  using size_type = size_t;
  using difference_type = ptrdiff_t;

  static size_t max_size();

  ABSL_ATTRIBUTE_REINITIALIZES void Reset();

  // Returns a pointer to the value associated with `key`, or `nullptr` if `key`
  // is absent.
  //
  // This can be a bit faster than `find()`.
  Value* absl_nullable FindOrNull(Key key) ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const Value* absl_nullable FindOrNull(Key key) const
      ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Returns a reference to the value associated with `key`, or a reference to
  // `default_value` if `key` is absent.
  const Value& FindOrDefault(
      Key key, const Value& default_value ABSL_ATTRIBUTE_LIFETIME_BOUND = {})
      const ABSL_ATTRIBUTE_LIFETIME_BOUND;

  iterator find(Key key) ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const_iterator find(Key key) const ABSL_ATTRIBUTE_LIFETIME_BOUND;

  bool contains(Key key) const;

  Value& at(Key key) ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const Value& at(Key key) const ABSL_ATTRIBUTE_LIFETIME_BOUND;

  bool empty() const {
    return direct_values_.get_deleter().size() == 0 &&
           ABSL_PREDICT_TRUE(slow_map_ == nullptr);
  }
  size_t size() const;

  iterator begin() ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const_iterator begin() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const_iterator cbegin() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return begin();
  }
  iterator end() ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const_iterator end() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const_iterator cend() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return end(); }

 protected:
  HybridDirectMapImpl() = default;

  HybridDirectMapImpl(const HybridDirectMapImpl& that) noexcept;
  HybridDirectMapImpl& operator=(const HybridDirectMapImpl& that) noexcept;

  HybridDirectMapImpl(HybridDirectMapImpl&& that) = default;
  HybridDirectMapImpl& operator=(HybridDirectMapImpl&& that) = default;

  template <typename Src, typename KeyProjection, typename ValueProjection>
  void Initialize(Src&& src, const KeyProjection& key_projection,
                  const ValueProjection& value_projection,
                  size_t direct_capacity);

  template <typename Index, typename KeyProjection, typename ValueProjection>
  void InitializeByIndex(Index size, const KeyProjection& key_projection,
                         const ValueProjection& value_projection,
                         size_t direct_capacity);

  static bool Equal(const HybridDirectMapImpl& a, const HybridDirectMapImpl& b);

 private:
  using RawKey = std::decay_t<decltype(Traits::ToRawKey(std::declval<Key>()))>;
  static_assert(std::is_unsigned_v<RawKey>);

  using DirectValues =
      SizedArray<DelayedConstructor<Value>, /*supports_abandon=*/true>;
  using DirectMap = SizedArray<Value* absl_nullable>;
  using SlowMap = absl::flat_hash_map<RawKey, Value>;

  static constexpr int kInverseMinLoadFactor = 4;  // 25%.

  template <typename Src, typename Iterator, typename KeyProjection,
            typename ValueProjection>
  void Optimize(Iterator first, Iterator last, size_t size,
                const KeyProjection& key_projection,
                const ValueProjection& value_projection,
                size_t direct_capacity);

  absl_nullable DirectValues CopyDirectValues() const;
  absl_nullable DirectMap
  CopyDirectMap(DelayedConstructor<Value>* absl_nullable dest_values) const;
  absl_nullable std::unique_ptr<SlowMap> CopySlowMap() const;

  ABSL_ATTRIBUTE_NORETURN static void KeyNotFound(Key key);

  size_t FirstRawKey() const;

  size_t capacity() const {
    return direct_map_.get_deleter().size() +
           (slow_map_ == nullptr ? 0 : slow_map_->capacity());
  }

  // Stores values for `direct_map_`, in no particular order.
  absl_nullable DirectValues direct_values_;
  // Indexed by raw key below `direct_map_.get_deleter().size()`. Elements
  // corresponding to present values point to elements of `direct_values_`.
  // The remaining elements are `nullptr`.
  absl_nullable DirectMap direct_map_;
  // If not `nullptr`, stores the mapping for keys too large for `direct_map_`.
  // Uses `std::unique_ptr` rather than `std::optional` to reduce memory usage
  // in the common case when `slow_map_` is not used.
  //
  // Invariant: if `slow_map_ != nullptr` then `!slow_map_->empty()`.
  absl_nullable std::unique_ptr<SlowMap> slow_map_;
};

}  // namespace hybrid_direct_internal

// `HybridDirectMap` is a map optimized for keys being mostly small integers
// or enums, especially if they are dense near zero. It supports only lookups
// and iteration, but no incremental modification.
//
// It stores a part of the map covering some range of small keys in an array
// of pointers to values, directly indexed by the key. The remaining keys are
// stored in an `absl::flat_hash_map`.
//
// `Traits` specifies a mapping of keys to an unsigned integer type. It must
// support at least the following static members:
//
// ```
//   // Translates the key to a raw key, which is an unsigned integer type.
//   // Different `Key` values must yield different `RawKey` values. Small raw
//   // keys are put in the array.
//   static RawKey ToRawKey(Key key);
//
//   // Translates the raw key back to a key.
//   //
//   // This is optional. Needed only for iterators.
//   static Key FromRawKey(RawKey raw_key);
// ```
//
// `direct_capacity`, if specified during building, is the intended capacity
// of the array part. The actual capacity can be smaller if all keys fit
// in the array, or larger if the array remains at least 25% full. Default:
// `kHybridDirectDefaultDirectCapacity` (128).
//
// In the case of duplicate keys, the first value wins.
template <typename Key, typename Value,
          typename Traits = HybridDirectTraits<Key>>
class HybridDirectMap
    : public hybrid_direct_internal::HybridDirectMapImpl<Key, Value, Traits>,
      public ConditionallyConstructible<std::is_copy_constructible_v<Value>,
                                        true>,
      public ConditionallyAssignable<std::is_copy_constructible_v<Value>, true>,
      public WithEqual<HybridDirectMap<Key, Value, Traits>> {
 private:
  template <typename Src, typename Enable = void>
  struct HasCompatibleKeys : std::false_type {};
  template <typename Src>
  struct HasCompatibleKeys<
      Src, std::enable_if_t<std::is_convertible_v<
               decltype(std::declval<ElementTypeT<const Src&>>().first), Key>>>
      : std::true_type {};

  template <typename Src, typename Enable = void>
  struct HasCompatibleValues : std::false_type {};
  template <typename Src>
  struct HasCompatibleValues<
      Src, std::enable_if_t<std::is_convertible_v<
               decltype(std::declval<ElementTypeT<Src>>().second), Value>>>
      : std::true_type {};

  template <typename Src, typename KeyProjection, typename Enable = void>
  struct HasProjectableKeys : std::false_type {};
  template <typename Src, typename KeyProjection>
  struct HasProjectableKeys<
      Src, KeyProjection,
      std::enable_if_t<std::is_convertible_v<
          std::invoke_result_t<const KeyProjection&, ElementTypeT<const Src&>>,
          Key>>> : std::true_type {};

  template <typename Src, typename ValueProjection, typename Enable = void>
  struct HasProjectableValues : std::false_type {};
  template <typename Src, typename ValueProjection>
  struct HasProjectableValues<
      Src, ValueProjection,
      std::enable_if_t<std::is_convertible_v<
          std::invoke_result_t<const ValueProjection&, ElementTypeT<Src>>,
          Value>>> : std::true_type {};

  template <typename Index, typename KeyProjection, typename Enable = void>
  struct HasGeneratableKeys : std::false_type {};
  template <typename Index, typename KeyProjection>
  struct HasGeneratableKeys<
      Index, KeyProjection,
      std::enable_if_t<std::is_convertible_v<
          std::invoke_result_t<const KeyProjection&, Index>, Key>>>
      : std::true_type {};

  template <typename Index, typename ValueProjection, typename Enable = void>
  struct HasGeneratableValues : std::false_type {};
  template <typename Index, typename ValueProjection>
  struct HasGeneratableValues<
      Index, ValueProjection,
      std::enable_if_t<std::is_convertible_v<
          std::invoke_result_t<const ValueProjection&, Index>, Value>>>
      : std::true_type {};

  template <typename Src>
  struct DefaultKeyProjection {
    Key operator()(ElementTypeT<const Src&> entry) const { return entry.first; }
  };

  template <typename Src>
  struct DefaultValueProjection {
    auto&& operator()(ElementTypeT<Src>&& entry) const {
      return std::forward<ElementTypeT<Src>>(entry).second;
    }
  };

 public:
  // Constructs an empty `HybridDirectMap`.
  HybridDirectMap() = default;

  // Builds `HybridDirectMap` from an iterable `src`. Moves values if `src` is
  // an rvalue which owns its elements.
  template <typename Src,
            std::enable_if_t<
                std::conjunction_v<
                    NotSameRef<HybridDirectMap, Src>, IsForwardIterable<Src>,
                    HasCompatibleKeys<Src>, HasCompatibleValues<Src>>,
                int> = 0>
  explicit HybridDirectMap(Src&& src) {
    this->Initialize(std::forward<Src>(src), DefaultKeyProjection<Src>(),
                     DefaultValueProjection<Src>(),
                     kHybridDirectDefaultDirectCapacity);
  }
  template <typename Src,
            std::enable_if_t<std::conjunction_v<IsForwardIterable<Src>,
                                                HasCompatibleKeys<Src>,
                                                HasCompatibleValues<Src>>,
                             int> = 0>
  explicit HybridDirectMap(Src&& src, size_t direct_capacity) {
    this->Initialize(std::forward<Src>(src), DefaultKeyProjection<Src>(),
                     DefaultValueProjection<Src>(), direct_capacity);
  }

  // Builds `HybridDirectMap` from an initializer list.
  /*implicit*/ HybridDirectMap(
      std::initializer_list<std::pair<Key, Value>> src,
      size_t direct_capacity = kHybridDirectDefaultDirectCapacity) {
    this->Initialize(src, DefaultKeyProjection<decltype(src)>(),
                     DefaultValueProjection<decltype(src)>(), direct_capacity);
  }

  // Builds `HybridDirectMap` from an iterable `src`. Moves values if `src` is
  // an rvalue which owns its elements.
  //
  // Keys and values are extracted using `key_projection()` and
  // `value_projection()` rather than `.first` and `.second`. `key_projection()`
  // may be called multiple times for each entry so it should be efficient.
  // `value_projection()` is called once for each entry so it can be expensive.
  template <
      typename Src, typename KeyProjection = DefaultKeyProjection<Src>,
      std::enable_if_t<
          std::conjunction_v<
              std::negation<std::is_convertible<KeyProjection, size_t>>,
              IsForwardIterable<Src>, HasProjectableKeys<Src, KeyProjection>,
              HasCompatibleValues<Src>>,
          int> = 0>
  explicit HybridDirectMap(
      Src&& src, const KeyProjection& key_projection,
      size_t direct_capacity = kHybridDirectDefaultDirectCapacity) {
    this->Initialize(std::forward<Src>(src), key_projection,
                     DefaultValueProjection<Src>(), direct_capacity);
  }
  template <
      typename Src, typename KeyProjection = DefaultKeyProjection<Src>,
      typename ValueProjection = DefaultValueProjection<Src>,
      std::enable_if_t<
          std::conjunction_v<
              std::negation<std::is_convertible<KeyProjection, size_t>>,
              std::negation<std::is_convertible<ValueProjection, size_t>>,
              IsForwardIterable<Src>, HasProjectableKeys<Src, KeyProjection>,
              HasProjectableValues<Src, ValueProjection>>,
          int> = 0>
  explicit HybridDirectMap(
      Src&& src, const KeyProjection& key_projection,
      const ValueProjection& value_projection,
      size_t direct_capacity = kHybridDirectDefaultDirectCapacity) {
    this->Initialize(std::forward<Src>(src), key_projection, value_projection,
                     direct_capacity);
  }

  // Builds `HybridDirectMap` from keys and values computed by invoking
  // `key_projection()` and `value_projection()` with indices from [0..`size`).
  //
  // `key_projection()` may be called multiple times for each index so it should
  // be efficient. `value_projection()` is called once for each index so it can
  // be expensive.
  template <typename Index, typename KeyProjection, typename ValueProjection,
            std::enable_if_t<
                std::conjunction_v<
                    std::is_integral<Index>,
                    std::negation<std::is_convertible<KeyProjection, size_t>>,
                    std::negation<std::is_convertible<ValueProjection, size_t>>,
                    HasGeneratableKeys<Index, KeyProjection>,
                    HasGeneratableValues<Index, ValueProjection>>,
                int> = 0>
  explicit HybridDirectMap(
      Index size, const KeyProjection& key_projection,
      const ValueProjection& value_projection,
      size_t direct_capacity = kHybridDirectDefaultDirectCapacity) {
    this->InitializeByIndex(size, key_projection, value_projection,
                            direct_capacity);
  }

  HybridDirectMap(const HybridDirectMap& that) = default;
  HybridDirectMap& operator=(const HybridDirectMap& that) = default;

  HybridDirectMap(HybridDirectMap&& that) = default;
  HybridDirectMap& operator=(HybridDirectMap&& that) = default;

  // Makes `*this` equivalent to a newly constructed `HybridDirectMap`.
  using HybridDirectMap::HybridDirectMapImpl::Reset;
  template <typename Src,
            std::enable_if_t<std::conjunction_v<IsForwardIterable<Src>,
                                                HasCompatibleKeys<Src>,
                                                HasCompatibleValues<Src>>,
                             int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      Src&& src, size_t direct_capacity = kHybridDirectDefaultDirectCapacity) {
    this->Reset();
    this->Initialize(std::forward<Src>(src), DefaultKeyProjection<Src>(),
                     DefaultValueProjection<Src>(), direct_capacity);
  }
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      std::initializer_list<std::pair<Key, Value>> src,
      size_t direct_capacity = kHybridDirectDefaultDirectCapacity) {
    this->Reset();
    this->Initialize(src, DefaultKeyProjection<decltype(src)>(),
                     DefaultValueProjection<decltype(src)>(), direct_capacity);
  }
  template <
      typename Src, typename KeyProjection = DefaultKeyProjection<Src>,
      std::enable_if_t<
          std::conjunction_v<
              std::negation<std::is_convertible<KeyProjection, size_t>>,
              IsForwardIterable<Src>, HasProjectableKeys<Src, KeyProjection>,
              HasCompatibleValues<Src>>,
          int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      Src&& src, const KeyProjection& key_projection,
      size_t direct_capacity = kHybridDirectDefaultDirectCapacity) {
    this->Reset();
    this->Initialize(std::forward<Src>(src), key_projection,
                     DefaultValueProjection<Src>(), direct_capacity);
  }
  template <
      typename Src, typename KeyProjection = DefaultKeyProjection<Src>,
      typename ValueProjection = DefaultValueProjection<Src>,
      std::enable_if_t<
          std::conjunction_v<
              std::negation<std::is_convertible<KeyProjection, size_t>>,
              std::negation<std::is_convertible<ValueProjection, size_t>>,
              IsForwardIterable<Src>, HasProjectableKeys<Src, KeyProjection>,
              HasProjectableValues<Src, ValueProjection>>,
          int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      Src&& src, const KeyProjection& key_projection,
      const ValueProjection& value_projection,
      size_t direct_capacity = kHybridDirectDefaultDirectCapacity) {
    this->Reset();
    this->Initialize(std::forward<Src>(src), key_projection, value_projection,
                     direct_capacity);
  }
  template <typename Index, typename KeyProjection, typename ValueProjection,
            std::enable_if_t<
                std::conjunction_v<
                    std::is_integral<Index>,
                    std::negation<std::is_convertible<KeyProjection, size_t>>,
                    std::negation<std::is_convertible<ValueProjection, size_t>>,
                    HasGeneratableKeys<Index, KeyProjection>,
                    HasGeneratableValues<Index, ValueProjection>>,
                int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      Index size, const KeyProjection& key_projection,
      const ValueProjection& value_projection,
      size_t direct_capacity = kHybridDirectDefaultDirectCapacity) {
    this->Reset();
    this->InitializeByIndex(size, key_projection, value_projection,
                            direct_capacity);
  }

  friend bool operator==(const HybridDirectMap& a, const HybridDirectMap& b) {
    return HybridDirectMap::HybridDirectMapImpl::Equal(a, b);
  }
};

namespace hybrid_direct_internal {

template <typename Key, typename Value, typename Traits>
template <bool is_const>
class HybridDirectMapImpl<Key, Value, Traits>::IteratorImpl
    : public WithEqual<IteratorImpl<is_const>> {
 public:
  using iterator_concept = std::forward_iterator_tag;
  // `iterator_category` is only `std::input_iterator_tag` because the
  // `LegacyForwardIterator` requirement and above require `reference` to be
  // a true reference type.
  using iterator_category = std::input_iterator_tag;
  using value_type = std::pair<const Key, Value>;
  using reference =
      ReferencePair<const Key,
                    std::conditional_t<is_const, const Value&, Value&>>;
  using pointer = ArrowProxy<reference>;
  using difference_type = ptrdiff_t;

  IteratorImpl() = default;

  // Conversion from `iterator` to `const_iterator`.
  template <bool that_is_const,
            std::enable_if_t<is_const && !that_is_const, int> = 0>
  /*implicit*/ IteratorImpl(IteratorImpl<that_is_const> that) noexcept
      : direct_map_end_(that.direct_map_end_),
        direct_map_size_(that.direct_map_size_),
        raw_key_complement_(that.raw_key_complement_),
        slow_map_iter_(that.slow_map_iter_) {}

  IteratorImpl(const IteratorImpl& that) = default;
  IteratorImpl& operator=(const IteratorImpl& that) = default;

  reference operator*() const {
    if (ABSL_PREDICT_TRUE(raw_key_complement_ > 0)) {
      return reference{Traits::FromRawKey(IntCast<RawKey>(direct_map_size_ -
                                                          raw_key_complement_)),
                       **(direct_map_end_ - raw_key_complement_)};
    }
    const auto iter = *slow_map_iter_;
    return reference{Traits::FromRawKey(iter->first), iter->second};
  }
  pointer operator->() const { return pointer(**this); }
  IteratorImpl& operator++() {
    if (ABSL_PREDICT_TRUE(raw_key_complement_ > 0)) {
      do {
        --raw_key_complement_;
        if (ABSL_PREDICT_FALSE(raw_key_complement_ == 0)) break;
      } while (*(direct_map_end_ - raw_key_complement_) == nullptr);
    } else {
      ++*slow_map_iter_;
    }
    return *this;
  }
  IteratorImpl operator++(int) {
    IteratorImpl result = *this;
    ++*this;
    return result;
  }

  friend bool operator==(IteratorImpl a, IteratorImpl b) {
    RIEGELI_ASSERT_EQ(a.direct_map_end_, b.direct_map_end_)
        << "Failed precondition of operator==(HybridDirectMap::iterator): "
           "incomparable iterators";
    RIEGELI_ASSERT_EQ(a.direct_map_size_, b.direct_map_size_)
        << "Failed precondition of operator==(HybridDirectMap::iterator): "
           "incomparable iterators";
    RIEGELI_ASSERT_EQ(a.slow_map_iter_ != std::nullopt,
                      b.slow_map_iter_ != std::nullopt)
        << "Failed precondition of operator==(HybridDirectMap::iterator): "
           "incomparable iterators";
    if (a.raw_key_complement_ != b.raw_key_complement_) return false;
    if (ABSL_PREDICT_TRUE(a.slow_map_iter_ == std::nullopt)) return true;
    return *a.slow_map_iter_ == *b.slow_map_iter_;
  }

 private:
  friend class HybridDirectMapImpl;

  explicit IteratorImpl(std::conditional_t<is_const, const HybridDirectMapImpl*,
                                           HybridDirectMapImpl*>
                            map ABSL_ATTRIBUTE_LIFETIME_BOUND,
                        size_t raw_key_complement)
      : direct_map_end_(map->direct_map_.get() +
                        map->direct_map_.get_deleter().size()),
        direct_map_size_(map->direct_map_.get_deleter().size()),
        raw_key_complement_(raw_key_complement) {}

  explicit IteratorImpl(
      std::conditional_t<is_const, const HybridDirectMapImpl*,
                         HybridDirectMapImpl*>
          map ABSL_ATTRIBUTE_LIFETIME_BOUND,
      size_t raw_key_complement,
      std::conditional_t<is_const, typename SlowMap::const_iterator,
                         typename SlowMap::iterator>
          slow_map_iter)
      : direct_map_end_(map->direct_map_.get() +
                        map->direct_map_.get_deleter().size()),
        direct_map_size_(map->direct_map_.get_deleter().size()),
        raw_key_complement_(raw_key_complement),
        slow_map_iter_(slow_map_iter) {}

  // The end of the `direct_map_` array.
  //
  // Counting backwards simplifies computing `end()` and advancing the iterator.
  absl_nullable const std::conditional_t<
      is_const, const Value*, Value*>* absl_nullable direct_map_end_ = nullptr;
  // `direct_map_.get_deleter().size()`.
  size_t direct_map_size_ = 0;
  // `direct_map_size_ - raw_key` when iterating over `direct_map_`,
  // otherwise 0.
  //
  // Invariant: if `raw_key_complement_ > 0` then
  // `*(direct_map_end_ - raw_key_complement_) != nullptr`.
  //
  // Counting backwards simplifies computing `end()` and advancing the iterator.
  size_t raw_key_complement_ = 0;
  // Iterator over `*slow_map_` when `slow_map_ != nullptr`, otherwise
  // `std::nullopt`.
  //
  // Invariant: if `raw_key_complement_ > 0` and `slow_map_ != nullptr` then
  // `slow_map_iter_ == slow_map_->begin()`.
  //
  // Distinguishing `std::nullopt` instead of using the default-constructed
  // `SlowMap::iterator` makes the common case of `operator==` faster by
  // reducing usage of `SlowMap` iterators.
  std::optional<std::conditional_t<is_const, typename SlowMap::const_iterator,
                                   typename SlowMap::iterator>>
      slow_map_iter_;
};

}  // namespace hybrid_direct_internal

// Implementation details follow.

namespace hybrid_direct_internal {

template <typename Key, typename Value, typename Traits>
inline size_t HybridDirectMapImpl<Key, Value, Traits>::max_size() {
  return UnsignedMin(SizedDeleter<Value* absl_nullable>::max_size(),
                     SizedDeleter<DelayedConstructor<Value>,
                                  /*supports_abandon=*/true>::max_size()) /
         kInverseMinLoadFactor;
}

template <typename Key, typename Value, typename Traits>
void HybridDirectMapImpl<Key, Value, Traits>::Reset() {
  direct_values_ = DirectValues();
  direct_map_ = DirectMap();
  slow_map_.reset();
}

template <typename Key, typename Value, typename Traits>
template <typename Src, typename KeyProjection, typename ValueProjection>
void HybridDirectMapImpl<Key, Value, Traits>::Initialize(
    Src&& src, const KeyProjection& key_projection,
    const ValueProjection& value_projection, size_t direct_capacity) {
  using std::begin;
  using std::end;
  if constexpr (IterableHasSize<Src>::value) {
    using std::size;
    const size_t src_size = size(src);
    RIEGELI_ASSERT_EQ(src_size,
                      IntCast<size_t>(std::distance(begin(src), end(src))))
        << "Failed precondition of HybridDirectMap initialization: "
           "size does not match the distance between iterators";
    if (src_size > 0) {
      Optimize<Src>(begin(src), end(src), src_size, key_projection,
                    value_projection, direct_capacity);
    }
  } else {
    auto first = begin(src);
    auto last = end(src);
    const size_t src_size = IntCast<size_t>(std::distance(first, last));
    if (src_size > 0) {
      Optimize<Src>(first, last, src_size, key_projection, value_projection,
                    direct_capacity);
    }
  }
#if RIEGELI_DEBUG
  // Detect building `HybridDirectMap` from a moved-from `src` if possible.
  if constexpr (std::conjunction_v<std::negation<std::is_reference<Src>>,
                                   std::is_move_constructible<Src>>) {
    ABSL_ATTRIBUTE_UNUSED Src moved = std::forward<Src>(src);
  }
#endif
}

template <typename Key, typename Value, typename Traits>
template <typename Index, typename KeyProjection, typename ValueProjection>
void HybridDirectMapImpl<Key, Value, Traits>::InitializeByIndex(
    Index size, const KeyProjection& key_projection,
    const ValueProjection& value_projection, size_t direct_capacity) {
  if (size > 0) {
    RIEGELI_CHECK_LE(UnsignedCast(size), std::numeric_limits<size_t>::max())
        << "Failed precondition of HybridDirectMap initialization: "
           "size overflow";
    // The template parameter of `Optimize()` serves only to determine whether
    // to apply `std::move_iterator`.
    Optimize<const Index[1]>(hybrid_direct_internal::IndexIterator<Index>(0),
                             hybrid_direct_internal::IndexIterator<Index>(size),
                             IntCast<size_t>(size), key_projection,
                             value_projection, direct_capacity);
  }
}

template <typename Key, typename Value, typename Traits>
template <typename Src, typename Iterator, typename KeyProjection,
          typename ValueProjection>
void HybridDirectMapImpl<Key, Value, Traits>::Optimize(
    Iterator first, Iterator last, size_t size,
    const KeyProjection& key_projection,
    const ValueProjection& value_projection, size_t direct_capacity) {
  RIEGELI_ASSERT_GE(size, 0u)
      << "Failed precondition of HybridDirectMapImpl::Optimize(): "
         "an empty map must have been handled before";
  RIEGELI_CHECK_LE(size, max_size())
      << "Failed precondition of HybridDirectMap initialization: "
         "size overflow";
  RawKey max_raw_key = 0;
  for (auto iter = first; iter != last; ++iter) {
    const RawKey raw_key = Traits::ToRawKey(std::invoke(key_projection, *iter));
    max_raw_key = UnsignedMax(max_raw_key, raw_key);
  }
  const size_t max_num_direct_keys =
      UnsignedMax(direct_capacity, size * kInverseMinLoadFactor);
  size_t direct_values_index;
  if (max_raw_key < max_num_direct_keys) {
    // All keys are suitable for `direct_map_`. `slow_map_` is not used.
    //
    // There is no need for `direct_map_` to cover raw keys above `max_raw_key`
    // because their lookup is fast if `slow_map_` is `nullptr`.
    hybrid_direct_internal::AssignToAssumedNull(
        direct_values_,
        MakeSizedArray<DelayedConstructor<Value>, /*supports_abandon=*/true>(
            size));
    hybrid_direct_internal::AssignToAssumedNull(
        direct_map_,
        MakeSizedArray<Value* absl_nullable>(IntCast<size_t>(max_raw_key) + 1));
    direct_values_index = 0;
    for (auto iter = first; iter != last; ++iter) {
      const RawKey raw_key =
          Traits::ToRawKey(std::invoke(key_projection, *iter));
      if (ABSL_PREDICT_FALSE(direct_map_[raw_key] != nullptr)) continue;
      direct_map_[raw_key] =
          &direct_values_[direct_values_index++].emplace(riegeli::Invoker(
              value_projection, *MaybeMakeMoveIterator<Src>(iter)));
    }
  } else {
    // Some keys are too large for `direct_map_`. `slow_map_` is used.
    //
    // `direct_map_` covers all raw keys below `max_num_direct_keys` rather than
    // only up to `max_raw_key`, to reduce lookups in `slow_map_`.
    size_t num_direct_values = 0;
    for (auto iter = first; iter != last; ++iter) {
      const RawKey raw_key =
          Traits::ToRawKey(std::invoke(key_projection, *iter));
      num_direct_values += raw_key < max_num_direct_keys ? 1 : 0;
    }
    RIEGELI_ASSERT_LT(num_direct_values, size)
        << "Some keys should have been too large for direct_map_";
    if (ABSL_PREDICT_FALSE(num_direct_values == 0)) {
      // The distribution is unfortunate: all keys are too large for
      // `direct_map_`. No lookup hits can be optimized. Do not allocate
      // `direct_map_` full of absent keys to save memory, at the cost of
      // not optimizing any lookup misses.
    } else {
      hybrid_direct_internal::AssignToAssumedNull(
          direct_values_,
          MakeSizedArray<DelayedConstructor<Value>, /*supports_abandon=*/true>(
              num_direct_values));
      hybrid_direct_internal::AssignToAssumedNull(
          direct_map_,
          MakeSizedArray<Value* absl_nullable>(max_num_direct_keys));
    }
    hybrid_direct_internal::AssignToAssumedNull(slow_map_,
                                                std::make_unique<SlowMap>());
    slow_map_->reserve(size - num_direct_values);
    direct_values_index = 0;
    for (auto iter = first; iter != last; ++iter) {
      const RawKey raw_key =
          Traits::ToRawKey(std::invoke(key_projection, *iter));
      if (raw_key < max_num_direct_keys) {
        if (ABSL_PREDICT_FALSE(direct_map_[raw_key] != nullptr)) continue;
        direct_map_[raw_key] =
            &direct_values_[direct_values_index++].emplace(riegeli::Invoker(
                value_projection, *MaybeMakeMoveIterator<Src>(iter)));
      } else {
        slow_map_->try_emplace(
            raw_key, riegeli::Invoker(value_projection,
                                      *MaybeMakeMoveIterator<Src>(iter)));
      }
    }
  }
  direct_values_.get_deleter().AbandonAfter(direct_values_.get(),
                                            direct_values_index);
}

template <typename Key, typename Value, typename Traits>
HybridDirectMapImpl<Key, Value, Traits>::HybridDirectMapImpl(
    const HybridDirectMapImpl& that) noexcept
    : direct_values_(that.CopyDirectValues()),
      direct_map_(that.CopyDirectMap(direct_values_.get())),
      slow_map_(that.CopySlowMap()) {}

template <typename Key, typename Value, typename Traits>
HybridDirectMapImpl<Key, Value, Traits>&
HybridDirectMapImpl<Key, Value, Traits>::operator=(
    const HybridDirectMapImpl& that) noexcept {
  absl_nullable DirectValues new_direct_values = that.CopyDirectValues();
  direct_map_ = that.CopyDirectMap(new_direct_values.get());
  direct_values_ = std::move(new_direct_values);
  slow_map_ = that.CopySlowMap();
  return *this;
}

template <typename Key, typename Value, typename Traits>
auto HybridDirectMapImpl<Key, Value, Traits>::CopyDirectValues() const ->
    absl_nullable DirectValues {
  if (direct_values_ == nullptr) return nullptr;
  DirectValues dest_ptr =
      MakeSizedArray<DelayedConstructor<Value>, /*supports_abandon=*/true>(
          direct_values_.get_deleter().size());
  DelayedConstructor<Value>* src_iter = direct_values_.get();
  DelayedConstructor<Value>* const end =
      dest_ptr.get() + dest_ptr.get_deleter().size();
  for (DelayedConstructor<Value>* dest_iter = dest_ptr.get(); dest_iter != end;
       ++dest_iter) {
    dest_iter->emplace(**src_iter);
    ++src_iter;
  }
  return dest_ptr;
}

template <typename Key, typename Value, typename Traits>
auto HybridDirectMapImpl<Key, Value, Traits>::CopyDirectMap(
    DelayedConstructor<Value>* absl_nullable dest_values) const ->
    absl_nullable DirectMap {
  if (direct_map_ == nullptr) return nullptr;
  DelayedConstructor<Value>* const absl_nullable src_values =
      direct_values_.get();
  DirectMap dest_ptr = MakeSizedArrayForOverwrite<Value* absl_nullable>(
      direct_map_.get_deleter().size());
  Value* absl_nullable* src_iter = direct_map_.get();
  Value* absl_nullable* const end =
      dest_ptr.get() + dest_ptr.get_deleter().size();
  for (Value* absl_nullable* dest_iter = dest_ptr.get(); dest_iter != end;
       ++dest_iter) {
    *dest_iter =
        *src_iter == nullptr
            ? nullptr
            : reinterpret_cast<Value*>(reinterpret_cast<char*>(dest_values) +
                                       ((reinterpret_cast<char*>(*src_iter) -
                                         reinterpret_cast<char*>(src_values))));
    ++src_iter;
  }
  return dest_ptr;
}

template <typename Key, typename Value, typename Traits>
auto HybridDirectMapImpl<Key, Value, Traits>::CopySlowMap() const ->
    absl_nullable std::unique_ptr<SlowMap> {
  if (slow_map_ == nullptr) return nullptr;
  return std::make_unique<SlowMap>(*slow_map_);
}

template <typename Key, typename Value, typename Traits>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline Value* absl_nullable
HybridDirectMapImpl<Key, Value, Traits>::FindOrNull(Key key)
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return const_cast<Value*>(std::as_const(*this).FindOrNull(key));
}

template <typename Key, typename Value, typename Traits>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline const Value* absl_nullable
HybridDirectMapImpl<Key, Value, Traits>::FindOrNull(Key key) const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_ASSERT(!direct_map_.get_deleter().IsMovedFromIfNull() ||
                 direct_map_ != nullptr)
      << "Moved-from HybridDirectMap";
  const RawKey raw_key = Traits::ToRawKey(key);
  if (raw_key < direct_map_.get_deleter().size()) return direct_map_[raw_key];
  if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) return nullptr;
  const auto iter = slow_map_->find(raw_key);
  if (iter == slow_map_->end()) return nullptr;
  return &iter->second;
}

template <typename Key, typename Value, typename Traits>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline const Value&
HybridDirectMapImpl<Key, Value, Traits>::FindOrDefault(
    Key key, const Value& default_value ABSL_ATTRIBUTE_LIFETIME_BOUND) const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_ASSERT(!direct_map_.get_deleter().IsMovedFromIfNull() ||
                 direct_map_ != nullptr)
      << "Moved-from HybridDirectMap";
  const RawKey raw_key = Traits::ToRawKey(key);
  if (raw_key < direct_map_.get_deleter().size()) {
    const Value* const absl_nullable value = direct_map_[raw_key];
    if (value == nullptr) return default_value;
    return *value;
  }
  if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) return default_value;
  const auto iter = slow_map_->find(raw_key);
  if (iter == slow_map_->end()) return default_value;
  return iter->second;
}

template <typename Key, typename Value, typename Traits>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline auto
HybridDirectMapImpl<Key, Value, Traits>::find(Key key)
    ABSL_ATTRIBUTE_LIFETIME_BOUND -> iterator {
  RIEGELI_ASSERT(!direct_map_.get_deleter().IsMovedFromIfNull() ||
                 direct_map_ != nullptr)
      << "Moved-from HybridDirectMap";
  const RawKey raw_key = Traits::ToRawKey(key);
  if (raw_key < direct_map_.get_deleter().size()) {
    if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) {
      return iterator(this, direct_map_[raw_key] == nullptr
                                ? 0
                                : direct_map_.get_deleter().size() - raw_key);
    }
    if (direct_map_[raw_key] == nullptr) {
      return iterator(this, 0, slow_map_->end());
    }
    return iterator(this, direct_map_.get_deleter().size() - raw_key,
                    slow_map_->begin());
  }
  if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) return iterator(this, 0);
  return iterator(this, 0, slow_map_->find(raw_key));
}

template <typename Key, typename Value, typename Traits>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline auto
HybridDirectMapImpl<Key, Value, Traits>::find(Key key) const
    ABSL_ATTRIBUTE_LIFETIME_BOUND -> const_iterator {
  RIEGELI_ASSERT(!direct_map_.get_deleter().IsMovedFromIfNull() ||
                 direct_map_ != nullptr)
      << "Moved-from HybridDirectMap";
  const RawKey raw_key = Traits::ToRawKey(key);
  if (raw_key < direct_map_.get_deleter().size()) {
    if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) {
      return const_iterator(this,
                            direct_map_[raw_key] == nullptr
                                ? 0
                                : direct_map_.get_deleter().size() - raw_key);
    }
    if (direct_map_[raw_key] == nullptr) {
      return const_iterator(this, 0, slow_map_->cend());
    }
    return const_iterator(this, direct_map_.get_deleter().size() - raw_key,
                          slow_map_->cbegin());
  }
  if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) return const_iterator(this, 0);
  return const_iterator(this, 0, std::as_const(*slow_map_).find(raw_key));
}

template <typename Key, typename Value, typename Traits>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool
HybridDirectMapImpl<Key, Value, Traits>::contains(Key key) const {
  RIEGELI_ASSERT(!direct_map_.get_deleter().IsMovedFromIfNull() ||
                 direct_map_ != nullptr)
      << "Moved-from HybridDirectMap";
  const RawKey raw_key = Traits::ToRawKey(key);
  if (raw_key < direct_map_.get_deleter().size()) {
    return direct_map_[raw_key] != nullptr;
  }
  if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) return false;
  return slow_map_->contains(raw_key);
}

template <typename Key, typename Value, typename Traits>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline Value&
HybridDirectMapImpl<Key, Value, Traits>::at(Key key)
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return const_cast<Value&>(std::as_const(*this).at(key));
}

template <typename Key, typename Value, typename Traits>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline const Value&
HybridDirectMapImpl<Key, Value, Traits>::at(Key key) const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_ASSERT(!direct_map_.get_deleter().IsMovedFromIfNull() ||
                 direct_map_ != nullptr)
      << "Moved-from HybridDirectMap";
  const RawKey raw_key = Traits::ToRawKey(key);
  if (raw_key < direct_map_.get_deleter().size()) {
    const Value* const absl_nullable value = direct_map_[raw_key];
    if (ABSL_PREDICT_FALSE(value == nullptr)) KeyNotFound(key);
    return *value;
  }
  if (ABSL_PREDICT_FALSE(slow_map_ == nullptr)) KeyNotFound(key);
  const auto iter = slow_map_->find(raw_key);
  if (ABSL_PREDICT_FALSE(iter == slow_map_->end())) KeyNotFound(key);
  return iter->second;
}

template <typename Key, typename Value, typename Traits>
ABSL_ATTRIBUTE_NORETURN void
HybridDirectMapImpl<Key, Value, Traits>::KeyNotFound(Key key) {
  RIEGELI_CHECK_UNREACHABLE()
      << "HybridDirectMap key not found: " << riegeli::Debug(key);
}

template <typename Key, typename Value, typename Traits>
inline size_t HybridDirectMapImpl<Key, Value, Traits>::FirstRawKey() const {
  const size_t direct_map_size = direct_map_.get_deleter().size();
  for (size_t raw_key = 0; raw_key < direct_map_size; ++raw_key) {
    if (direct_map_[raw_key] != nullptr) return raw_key;
  }
  return direct_map_size;
}

template <typename Key, typename Value, typename Traits>
inline size_t HybridDirectMapImpl<Key, Value, Traits>::size() const {
  return direct_values_.get_deleter().size() +
         (ABSL_PREDICT_TRUE(slow_map_ == nullptr) ? 0 : slow_map_->size());
}

template <typename Key, typename Value, typename Traits>
inline auto HybridDirectMapImpl<Key, Value, Traits>::begin()
    ABSL_ATTRIBUTE_LIFETIME_BOUND -> iterator {
  const size_t raw_key_complement =
      direct_map_.get_deleter().size() - FirstRawKey();
  if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) {
    return iterator(this, raw_key_complement);
  }
  return iterator(this, raw_key_complement, slow_map_->begin());
}

template <typename Key, typename Value, typename Traits>
inline auto HybridDirectMapImpl<Key, Value, Traits>::begin() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND -> const_iterator {
  const size_t raw_key_complement =
      direct_map_.get_deleter().size() - FirstRawKey();
  if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) {
    return const_iterator(this, raw_key_complement);
  }
  return const_iterator(this, raw_key_complement, slow_map_->cbegin());
}

template <typename Key, typename Value, typename Traits>
inline auto HybridDirectMapImpl<Key, Value, Traits>::end()
    ABSL_ATTRIBUTE_LIFETIME_BOUND -> iterator {
  if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) return iterator(this, 0);
  return iterator(this, 0, slow_map_->end());
}

template <typename Key, typename Value, typename Traits>
inline auto HybridDirectMapImpl<Key, Value, Traits>::end() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND -> const_iterator {
  if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) return const_iterator(this, 0);
  return const_iterator(this, 0, slow_map_->cend());
}

template <typename Key, typename Value, typename Traits>
bool HybridDirectMapImpl<Key, Value, Traits>::Equal(
    const HybridDirectMapImpl& a, const HybridDirectMapImpl& b) {
  if (a.size() != b.size()) return false;
  const HybridDirectMapImpl* outer;
  const HybridDirectMapImpl* inner;
  if (a.capacity() <= b.capacity()) {
    outer = &a;
    inner = &b;
  } else {
    outer = &b;
    inner = &a;
  }
  for (const_reference entry : *outer) {
    const auto* const found = inner->FindOrNull(entry.first);
    if (found == nullptr || *found != entry.second) return false;
  }
  return true;
}

}  // namespace hybrid_direct_internal

}  // namespace riegeli

#endif  // RIEGELI_BASE_HYBRID_DIRECT_MAP_H_


================================================
FILE: riegeli/base/hybrid_direct_set.h
================================================
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_HYBRID_DIRECT_SET_H_
#define RIEGELI_BASE_HYBRID_DIRECT_SET_H_

#include <stddef.h>

#include <cstring>
#include <functional>
#include <initializer_list>
#include <iterator>
#include <limits>
#include <memory>
#include <optional>
#include <type_traits>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/container/flat_hash_set.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/hybrid_direct_common.h"  // IWYU pragma: export
#include "riegeli/base/hybrid_direct_internal.h"
#include "riegeli/base/iterable.h"
#include "riegeli/base/type_traits.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// `HybridDirectSet` is a set optimized for keys being mostly small integers
// or enums, especially if they are dense near zero. It supports only lookups
// and iteration, but no incremental modification.
//
// It stores a part of the set covering some range of small keys in an array
// of booleans, directly indexed by the key. The remaining keys are stored in
// an `absl::flat_hash_set`.
//
// `Traits` specifies a mapping of keys to an unsigned integer type. It must
// support at least the following static members:
//
// ```
//   // Translates the key to a raw key, which is an unsigned integer type.
//   // Small raw keys are put in the array.
//   static RawKey ToRawKey(Key key);
//
//   // Translates the raw key back to a key.
//   //
//   // This is optional. Needed only for iterators.
//   static Key FromRawKey(RawKey raw_key);
// ```
//
// `direct_capacity`, if specified during building, is the intended capacity
// of the array part. The actual capacity can be smaller if all keys fit
// in the array, or larger if the array remains at least 25% full. Default:
// `kHybridDirectDefaultDirectCapacity` (128).
//
// In the case of duplicate keys, one is retained.
template <typename Key, typename Traits = HybridDirectTraits<Key>>
class HybridDirectSet : public WithEqual<HybridDirectSet<Key, Traits>> {
 private:
  template <typename Src, typename Enable = void>
  struct HasCompatibleKeys : std::false_type {};
  template <typename Src>
  struct HasCompatibleKeys<
      Src,
      std::enable_if_t<std::is_convertible_v<ElementTypeT<const Src&>, Key>>>
      : std::true_type {};

  template <typename Src, typename KeyProjection, typename Enable = void>
  struct HasProjectableKeys : std::false_type {};
  template <typename Src, typename KeyProjection>
  struct HasProjectableKeys<
      Src, KeyProjection,
      std::enable_if_t<std::is_convertible_v<
          std::invoke_result_t<const KeyProjection&, ElementTypeT<const Src&>>,
          Key>>> : std::true_type {};

  template <typename Index, typename KeyProjection, typename Enable = void>
  struct HasGeneratableKeys : std::false_type {};
  template <typename Index, typename KeyProjection>
  struct HasGeneratableKeys<
      Index, KeyProjection,
      std::enable_if_t<std::is_convertible_v<
          std::invoke_result_t<const KeyProjection&, Index>, Key>>>
      : std::true_type {};

  template <typename Src>
  struct DefaultKeyProjection {
    Key operator()(ElementTypeT<const Src&> key) const { return key; }
  };

 public:
  using value_type = Key;
  using reference = Key;
  using const_reference = Key;
  using pointer = void;
  using const_pointer = void;
  class iterator;
  using const_iterator = iterator;
  using size_type = size_t;
  using difference_type = ptrdiff_t;

  static size_t max_size();

  // Constructs an empty `HybridDirectSet`.
  HybridDirectSet() = default;

  // Builds `HybridDirectSet` from an iterable `src`.
  template <
      typename Src,
      std::enable_if_t<
          std::conjunction_v<NotSameRef<HybridDirectSet, Src>,
                             IsForwardIterable<Src>, HasCompatibleKeys<Src>>,
          int> = 0>
  explicit HybridDirectSet(const Src& src) {
    Initialize(src, DefaultKeyProjection<Src>(),
               kHybridDirectDefaultDirectCapacity);
  }
  template <typename Src,
            std::enable_if_t<std::conjunction_v<IsForwardIterable<Src>,
                                                HasCompatibleKeys<Src>>,
                             int> = 0>
  explicit HybridDirectSet(const Src& src, size_t direct_capacity) {
    Initialize(src, DefaultKeyProjection<Src>(), direct_capacity);
  }

  // Builds `HybridDirectSet` from an initializer list.
  /*implicit*/ HybridDirectSet(
      std::initializer_list<Key> src,
      size_t direct_capacity = kHybridDirectDefaultDirectCapacity) {
    Initialize(src, DefaultKeyProjection<decltype(src)>(), direct_capacity);
  }

  // Builds `HybridDirectSet` from an iterable `src`.
  //
  // Keys are extracted using `key_projection()`. `key_projection()` may be
  // called multiple times for each key so it should be efficient.
  template <
      typename Src, typename KeyProjection = DefaultKeyProjection<Src>,
      std::enable_if_t<
          std::conjunction_v<
              std::negation<std::is_convertible<KeyProjection, size_t>>,
              IsForwardIterable<Src>, HasProjectableKeys<Src, KeyProjection>>,
          int> = 0>
  explicit HybridDirectSet(
      const Src& src, const KeyProjection& key_projection,
      size_t direct_capacity = kHybridDirectDefaultDirectCapacity) {
    Initialize(src, key_projection, direct_capacity);
  }

  // Builds `HybridDirectSet` from keys computed by invoking `key_projection()`
  // with indices from [0..`size`).
  //
  // `key_projection()` may be called multiple times for each index so it should
  // be efficient.
  template <typename Index, typename KeyProjection,
            std::enable_if_t<
                std::conjunction_v<
                    std::is_integral<Index>,
                    std::negation<std::is_convertible<KeyProjection, size_t>>,
                    HasGeneratableKeys<Index, KeyProjection>>,
                int> = 0>
  explicit HybridDirectSet(
      Index size, const KeyProjection& key_projection,
      size_t direct_capacity = kHybridDirectDefaultDirectCapacity) {
    InitializeByIndex(size, key_projection, direct_capacity);
  }

  HybridDirectSet(const HybridDirectSet& that) noexcept;
  HybridDirectSet& operator=(const HybridDirectSet& that) noexcept;

  HybridDirectSet(HybridDirectSet&& that) = default;
  HybridDirectSet& operator=(HybridDirectSet&& that) = default;

  // Makes `*this` equivalent to a newly constructed `HybridDirectSet`.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset();
  template <typename Src,
            std::enable_if_t<std::conjunction_v<IsForwardIterable<Src>,
                                                HasCompatibleKeys<Src>>,
                             int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      const Src& src,
      size_t direct_capacity = kHybridDirectDefaultDirectCapacity) {
    Reset();
    Initialize(src, DefaultKeyProjection<Src>(), direct_capacity);
  }
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      std::initializer_list<Key> src,
      size_t direct_capacity = kHybridDirectDefaultDirectCapacity) {
    Reset();
    Initialize(src, DefaultKeyProjection<decltype(src)>(), direct_capacity);
  }
  template <
      typename Src, typename KeyProjection = DefaultKeyProjection<Src>,
      std::enable_if_t<
          std::conjunction_v<
              std::negation<std::is_convertible<KeyProjection, size_t>>,
              IsForwardIterable<Src>, HasProjectableKeys<Src, KeyProjection>>,
          int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      const Src& src, const KeyProjection& key_projection,
      size_t direct_capacity = kHybridDirectDefaultDirectCapacity) {
    Reset();
    Initialize(src, key_projection, direct_capacity);
  }
  template <typename Index, typename KeyProjection,
            std::enable_if_t<
                std::conjunction_v<
                    std::is_integral<Index>,
                    std::negation<std::is_convertible<KeyProjection, size_t>>,
                    HasGeneratableKeys<Index, KeyProjection>>,
                int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      Index size, const KeyProjection& key_projection,
      size_t direct_capacity = kHybridDirectDefaultDirectCapacity) {
    Reset();
    InitializeByIndex(size, key_projection, direct_capacity);
  }

  bool contains(Key key) const;

  bool empty() const { return size_ == 0; }
  size_t size() const { return size_; }

  iterator begin() ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const_iterator begin() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const_iterator cbegin() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return begin();
  }
  iterator end() ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const_iterator end() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const_iterator cend() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return end(); }

  friend bool operator==(const HybridDirectSet& a, const HybridDirectSet& b) {
    return Equal(a, b);
  }

 private:
  using RawKey = std::decay_t<decltype(Traits::ToRawKey(std::declval<Key>()))>;
  static_assert(std::is_unsigned_v<RawKey>);

  using DirectSet = hybrid_direct_internal::SizedArray<bool>;
  using SlowSet = absl::flat_hash_set<RawKey>;

  static constexpr int kInverseMinLoadFactor = 4;  // 25%.

  template <typename Src, typename KeyProjection>
  void Initialize(const Src& src, const KeyProjection& key_projection,
                  size_t direct_capacity);

  template <typename Index, typename KeyProjection>
  void InitializeByIndex(Index size, const KeyProjection& key_projection,
                         size_t direct_capacity);

  template <typename Iterator, typename KeyProjection>
  void Optimize(Iterator first, Iterator last, size_t size,
                const KeyProjection& key_projection, size_t direct_capacity);

  absl_nullable DirectSet CopyDirectSet() const;
  absl_nullable std::unique_ptr<SlowSet> CopySlowSet() const;

  size_t FirstRawKey() const;

  size_t capacity() const {
    return direct_set_.get_deleter().size() +
           (slow_set_ == nullptr ? 0 : slow_set_->capacity());
  }

  static bool Equal(const HybridDirectSet& a, const HybridDirectSet& b);

  // Indexed by raw key below `direct_set_.get_deleter().size()`.
  absl_nullable DirectSet direct_set_;
  // If not `nullptr`, stores the set of keys too large for `direct_set_`.
  // Uses `std::unique_ptr` rather than `std::optional` to reduce memory usage
  // in the common case when `slow_set_` is not used.
  //
  // Invariant: if `slow_set_ != nullptr` then `!slow_set_->empty()`.
  absl_nullable std::unique_ptr<SlowSet> slow_set_;
  size_t size_ = 0;
};

template <typename Key, typename Traits>
class HybridDirectSet<Key, Traits>::iterator : public WithEqual<iterator> {
 public:
  using iterator_concept = std::forward_iterator_tag;
  // `iterator_category` is only `std::input_iterator_tag` because the
  // `LegacyForwardIterator` requirement and above require `reference` to be
  // a true reference type.
  using iterator_category = std::input_iterator_tag;
  using value_type = Key;
  using reference = Key;
  using pointer = void;
  using difference_type = ptrdiff_t;

  iterator() = default;

  iterator(const iterator& that) = default;
  iterator& operator=(const iterator& that) = default;

  reference operator*() const {
    if (ABSL_PREDICT_TRUE(raw_key_complement_ > 0)) {
      return Traits::FromRawKey(
          IntCast<RawKey>(direct_set_size_ - raw_key_complement_));
    }
    return Traits::FromRawKey(**slow_set_iter_);
  }
  iterator& operator++() {
    if (ABSL_PREDICT_TRUE(raw_key_complement_ > 0)) {
      do {
        --raw_key_complement_;
        if (ABSL_PREDICT_FALSE(raw_key_complement_ == 0)) break;
      } while (!*(direct_set_end_ - raw_key_complement_));
    } else {
      ++*slow_set_iter_;
    }
    return *this;
  }
  iterator operator++(int) {
    iterator result = *this;
    ++*this;
    return result;
  }

  friend bool operator==(iterator a, iterator b) {
    RIEGELI_ASSERT_EQ(a.direct_set_end_, b.direct_set_end_)
        << "Failed precondition of operator==(HybridDirectSet::iterator): "
           "incomparable iterators";
    RIEGELI_ASSERT_EQ(a.direct_set_size_, b.direct_set_size_)
        << "Failed precondition of operator==(HybridDirectSet::iterator): "
           "incomparable iterators";
    RIEGELI_ASSERT_EQ(a.slow_set_iter_ != std::nullopt,
                      b.slow_set_iter_ != std::nullopt)
        << "Failed precondition of operator==(HybridDirectSet::iterator): "
           "incomparable iterators";
    if (a.raw_key_complement_ != b.raw_key_complement_) return false;
    if (ABSL_PREDICT_TRUE(a.slow_set_iter_ == std::nullopt)) return true;
    return *a.slow_set_iter_ == *b.slow_set_iter_;
  }

 private:
  friend class HybridDirectSet;

  explicit iterator(const HybridDirectSet* set ABSL_ATTRIBUTE_LIFETIME_BOUND,
                    size_t raw_key_complement)
      : direct_set_end_(set->direct_set_.get() +
                        set->direct_set_.get_deleter().size()),
        direct_set_size_(set->direct_set_.get_deleter().size()),
        raw_key_complement_(raw_key_complement) {}

  explicit iterator(const HybridDirectSet* set ABSL_ATTRIBUTE_LIFETIME_BOUND,
                    size_t raw_key_complement,
                    typename SlowSet::const_iterator slow_set_iter)
      : direct_set_end_(set->direct_set_.get() +
                        set->direct_set_.get_deleter().size()),
        direct_set_size_(set->direct_set_.get_deleter().size()),
        raw_key_complement_(raw_key_complement),
        slow_set_iter_(slow_set_iter) {}

  // The end of the `direct_set_` array.
  //
  // Counting backwards simplifies checking for iteration over `direct_set_`.
  const bool* absl_nullable direct_set_end_ = nullptr;
  // `direct_set_.get_deleter().size()`.
  size_t direct_set_size_ = 0;
  // `direct_set_size_ - raw_key` when iterating over `direct_set_`,
  // otherwise 0.
  //
  // Invariant: if `raw_key_complement_ > 0` then
  // `*(direct_set_end_ - raw_key_complement_) != nullptr`.
  //
  // Counting backwards simplifies computing `end()` and advancing the iterator.
  size_t raw_key_complement_ = 0;
  // Iterator over `*slow_set_` when `slow_set_ != nullptr`, otherwise
  // `std::nullopt`.
  //
  // Invariant: if `raw_key_complement_ > 0` and `slow_set_ != nullptr` then
  // `slow_set_iter_ == slow_set_->begin()`.
  //
  // Distinguishing `std::nullopt` instead of using the default-constructed
  // `SlowSet::iterator` makes the common case of `operator==` faster by
  // reducing usage of `SlowSet` iterators.
  std::optional<typename SlowSet::const_iterator> slow_set_iter_;
};

// Implementation details follow.

template <typename Key, typename Traits>
inline size_t HybridDirectSet<Key, Traits>::max_size() {
  return hybrid_direct_internal::SizedDeleter<bool>::max_size() /
         kInverseMinLoadFactor;
}

template <typename Key, typename Traits>
HybridDirectSet<Key, Traits>::HybridDirectSet(
    const HybridDirectSet& that) noexcept
    : direct_set_(that.CopyDirectSet()),
      slow_set_(that.CopySlowSet()),
      size_(that.size_) {}

template <typename Key, typename Traits>
HybridDirectSet<Key, Traits>& HybridDirectSet<Key, Traits>::operator=(
    const HybridDirectSet& that) noexcept {
  direct_set_ = that.CopyDirectSet();
  slow_set_ = that.CopySlowSet();
  size_ = that.size_;
  return *this;
}

template <typename Key, typename Traits>
void HybridDirectSet<Key, Traits>::Reset() {
  direct_set_ = DirectSet();
  slow_set_.reset();
  size_ = 0;
}

template <typename Key, typename Traits>
template <typename Src, typename KeyProjection>
void HybridDirectSet<Key, Traits>::Initialize(
    const Src& src, const KeyProjection& key_projection,
    size_t direct_capacity) {
  using std::begin;
  using std::end;
  if constexpr (IterableHasSize<Src>::value) {
    using std::size;
    const size_t src_size = size(src);
    RIEGELI_ASSERT_EQ(src_size,
                      IntCast<size_t>(std::distance(begin(src), end(src))))
        << "Failed precondition of HybridDirectSet initialization: "
           "size does not match the distance between iterators";
    if (src_size > 0) {
      Optimize(begin(src), end(src), src_size, key_projection, direct_capacity);
    }
  } else {
    auto first = begin(src);
    auto last = end(src);
    const size_t src_size = IntCast<size_t>(std::distance(first, last));
    if (src_size > 0)
      Optimize(first, last, src_size, key_projection, direct_capacity);
  }
}

template <typename Key, typename Traits>
template <typename Index, typename KeyProjection>
void HybridDirectSet<Key, Traits>::InitializeByIndex(
    Index size, const KeyProjection& key_projection, size_t direct_capacity) {
  if (size > 0) {
    RIEGELI_CHECK_LE(UnsignedCast(size), std::numeric_limits<size_t>::max())
        << "Failed precondition of HybridDirectSet initialization: "
           "size overflow";
    Optimize(hybrid_direct_internal::IndexIterator<Index>(0),
             hybrid_direct_internal::IndexIterator<Index>(size),
             IntCast<size_t>(size), key_projection, direct_capacity);
  }
}

template <typename Key, typename Traits>
template <typename Iterator, typename KeyProjection>
void HybridDirectSet<Key, Traits>::Optimize(Iterator first, Iterator last,
                                            size_t size,
                                            const KeyProjection& key_projection,
                                            size_t direct_capacity) {
  RIEGELI_ASSERT_GE(size, 0u)
      << "Failed precondition of HybridDirectSet::Optimize(): "
         "an empty map must have been handled before";
  RIEGELI_CHECK_LE(size, max_size())
      << "Failed precondition of HybridDirectSet initialization: "
         "size overflow";
  RawKey max_raw_key = 0;
  for (auto iter = first; iter != last; ++iter) {
    const RawKey raw_key = Traits::ToRawKey(std::invoke(key_projection, *iter));
    max_raw_key = UnsignedMax(max_raw_key, raw_key);
  }
  const size_t max_num_direct_keys =
      UnsignedMax(direct_capacity, size * kInverseMinLoadFactor);
  size_ = size;
  if (max_raw_key < max_num_direct_keys) {
    // All keys are suitable for `direct_set_`. `slow_set_` is not used.
    //
    // There is no need for `direct_set_` to cover raw keys above `max_raw_key`
    // because their lookup is fast if `slow_set_` is `nullptr`.
    hybrid_direct_internal::AssignToAssumedNull(
        direct_set_, hybrid_direct_internal::MakeSizedArray<bool>(
                         IntCast<size_t>(max_raw_key) + 1));
    for (auto iter = first; iter != last; ++iter) {
      const RawKey raw_key =
          Traits::ToRawKey(std::invoke(key_projection, *iter));
      if (ABSL_PREDICT_FALSE(direct_set_[raw_key])) --size_;
      direct_set_[raw_key] = true;
    }
  } else {
    // Some keys are too large for `direct_set_`. `slow_set_` is used.
    //
    // `direct_set_` covers all raw keys below `max_num_direct_keys` rather than
    // only up to `max_raw_key`, to reduce lookups in `slow_set_`.
    size_t num_direct_elements = 0;
    for (auto iter = first; iter != last; ++iter) {
      const RawKey raw_key =
          Traits::ToRawKey(std::invoke(key_projection, *iter));
      num_direct_elements += raw_key < max_num_direct_keys ? 1 : 0;
    }
    RIEGELI_ASSERT_LT(num_direct_elements, size)
        << "Some keys should have been too large for direct_set_";
    if (ABSL_PREDICT_FALSE(num_direct_elements == 0)) {
      // The distribution is unfortunate: all keys are too large for
      // `direct_set_`. No lookup hits can be optimized. Do not allocate
      // `direct_set_` full of absent keys to save memory, at the cost of
      // not optimizing any lookup misses.
    } else {
      hybrid_direct_internal::AssignToAssumedNull(
          direct_set_,
          hybrid_direct_internal::MakeSizedArray<bool>(max_num_direct_keys));
    }
    hybrid_direct_internal::AssignToAssumedNull(slow_set_,
                                                std::make_unique<SlowSet>());
    slow_set_->reserve(size - num_direct_elements);
    for (auto iter = first; iter != last; ++iter) {
      const RawKey raw_key =
          Traits::ToRawKey(std::invoke(key_projection, *iter));
      if (raw_key < max_num_direct_keys) {
        if (ABSL_PREDICT_FALSE(direct_set_[raw_key])) --size_;
        direct_set_[raw_key] = true;
      } else {
        const auto inserted = slow_set_->insert(raw_key);
        if (ABSL_PREDICT_FALSE(!inserted.second)) --size_;
      }
    }
  }
}

template <typename Key, typename Traits>
auto HybridDirectSet<Key, Traits>::CopyDirectSet() const ->
    absl_nullable DirectSet {
  if (direct_set_ == nullptr) return nullptr;
  DirectSet dest_ptr = hybrid_direct_internal::MakeSizedArrayForOverwrite<bool>(
      direct_set_.get_deleter().size());
  std::memcpy(dest_ptr.get(), direct_set_.get(),
              dest_ptr.get_deleter().size() * sizeof(bool));
  return dest_ptr;
}

template <typename Key, typename Traits>
auto HybridDirectSet<Key, Traits>::CopySlowSet() const ->
    absl_nullable std::unique_ptr<SlowSet> {
  if (slow_set_ == nullptr) return nullptr;
  return std::make_unique<SlowSet>(*slow_set_);
}

template <typename Key, typename Traits>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool HybridDirectSet<Key, Traits>::contains(
    Key key) const {
  RIEGELI_ASSERT(!direct_set_.get_deleter().IsMovedFromIfNull() ||
                 direct_set_ != nullptr)
      << "Moved-from HybridDirectSet";
  const RawKey raw_key = Traits::ToRawKey(key);
  if (raw_key < direct_set_.get_deleter().size()) return direct_set_[raw_key];
  if (ABSL_PREDICT_TRUE(slow_set_ == nullptr)) return false;
  return slow_set_->contains(raw_key);
}

template <typename Key, typename Traits>
inline size_t HybridDirectSet<Key, Traits>::FirstRawKey() const {
  const size_t direct_set_size = direct_set_.get_deleter().size();
  for (size_t raw_key = 0; raw_key < direct_set_size; ++raw_key) {
    if (direct_set_[raw_key]) return raw_key;
  }
  return direct_set_size;
}

template <typename Key, typename Traits>
inline auto HybridDirectSet<Key, Traits>::begin() ABSL_ATTRIBUTE_LIFETIME_BOUND
    -> iterator {
  const size_t raw_key_complement =
      direct_set_.get_deleter().size() - FirstRawKey();
  if (ABSL_PREDICT_TRUE(slow_set_ == nullptr)) {
    return iterator(this, raw_key_complement);
  }
  return iterator(this, raw_key_complement, slow_set_->begin());
}

template <typename Key, typename Traits>
inline auto HybridDirectSet<Key, Traits>::begin() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND -> const_iterator {
  const size_t raw_key_complement =
      direct_set_.get_deleter().size() - FirstRawKey();
  if (ABSL_PREDICT_TRUE(slow_set_ == nullptr)) {
    return const_iterator(this, raw_key_complement);
  }
  return const_iterator(this, raw_key_complement, slow_set_->cbegin());
}

template <typename Key, typename Traits>
inline auto HybridDirectSet<Key, Traits>::end() ABSL_ATTRIBUTE_LIFETIME_BOUND
    -> iterator {
  if (ABSL_PREDICT_TRUE(slow_set_ == nullptr)) return iterator(this, 0);
  return iterator(this, 0, slow_set_->end());
}

template <typename Key, typename Traits>
inline auto HybridDirectSet<Key, Traits>::end() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND -> const_iterator {
  if (ABSL_PREDICT_TRUE(slow_set_ == nullptr)) return const_iterator(this, 0);
  return const_iterator(this, 0, slow_set_->cend());
}

template <typename Key, typename Traits>
bool HybridDirectSet<Key, Traits>::Equal(const HybridDirectSet& a,
                                         const HybridDirectSet& b) {
  if (a.size() != b.size()) return false;
  const HybridDirectSet* outer;
  const HybridDirectSet* inner;
  if (a.capacity() <= b.capacity()) {
    outer = &a;
    inner = &b;
  } else {
    outer = &b;
    inner = &a;
  }
  for (Key key : *outer) {
    if (!inner->contains(key)) return false;
  }
  return true;
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_HYBRID_DIRECT_SET_H_


================================================
FILE: riegeli/base/initializer.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_INITIALIZER_H_
#define RIEGELI_BASE_INITIALIZER_H_

#include <stddef.h>

#include <functional>
#include <memory>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "riegeli/base/initializer_internal.h"
#include "riegeli/base/invoker.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/temporary_storage.h"
#include "riegeli/base/type_erased_ref.h"
#include "riegeli/base/type_traits.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

template <typename T>
class Initializer;

namespace initializer_internal {

// `IsInitializer` detects `Initializer` types with the given target type.

template <typename T, typename Arg>
struct IsInitializer : std::false_type {};

template <typename T>
struct IsInitializer<T, Initializer<T>> : std::true_type {};

// Part of `Initializer<T>` for `T` being a non-reference type.
template <typename T>
class InitializerBase {
 public:
  // Constructs the `T`.
  /*implicit*/ operator T() && { return std::move(*this).Construct(); }

  // Constructs the `T`.
  //
  // Usually conversion to `T` is preferred because it can avoid creating a
  // temporary if the context accepts an arbitrary type convertible to `T` and
  // it leads to simpler source code. An explicit `Construct()` call can force
  // construction right away while avoiding specifying the full target type.
  T Construct() && { return methods()->construct(context()); }

  // Constructs the `std::decay_t<T>` on the heap.
  //
  // In contrast to `std::make_unique()`, this supports custom deleters.
  //
  // For a non-default-constructed deleter, use `UniquePtr(deleter)`.
  template <typename Target, typename Deleter,
            std::enable_if_t<std::is_convertible_v<std::decay_t<T>*, Target*>,
                             int> = 0>
  /*implicit*/ operator std::unique_ptr<Target, Deleter>() && {
    return std::move(*this).template UniquePtr<Deleter>();
  }
  template <typename Target, typename Deleter,
            std::enable_if_t<std::is_convertible_v<std::decay_t<T>*, Target*>,
                             int> = 0>
  /*implicit*/ operator std::unique_ptr<Target, Deleter>() const& {
    return UniquePtr<Deleter>();
  }

  // Constructs the `std::decay_t<T>` on the heap.
  //
  // In contrast to `std::make_unique()`, this supports custom deleters.
  //
  // Usually conversion to `std::unique_ptr` is preferred because it leads to
  // simpler source code. An explicit `UniquePtr()` call can force construction
  // right away while avoiding writing the full target type, and it allows to
  // use a non-default-constructed deleter.
  template <typename Deleter = std::default_delete<std::decay_t<T>>>
  std::unique_ptr<std::decay_t<T>, Deleter> UniquePtr() && {
    return std::unique_ptr<std::decay_t<T>, Deleter>(
        new std::decay_t<T>(std::move(*this)));
  }
  template <typename Deleter>
  std::unique_ptr<std::decay_t<T>, Deleter> UniquePtr(Deleter&& deleter) && {
    return std::unique_ptr<std::decay_t<T>, Deleter>(
        new std::decay_t<T>(std::move(*this)), std::forward<Deleter>(deleter));
  }

  // Constructs the `T` in `storage` which must outlive the returned reference,
  // or returns a reference to an already constructed object if a compatible
  // object was passed to `Initializer` constructor.
  //
  // `Reference()` instead of conversion to `T` or `Construct()` can avoid
  // moving the object if the caller does not need to store the object, or if it
  // will be moved later because the target location for the object is not ready
  // yet.
  //
  // `storage` must outlive usages of the returned reference.
  T&& Reference(
      TemporaryStorage<T>&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND = {}) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return methods()->reference(context(), std::move(storage));
  }

 private:
  static T ConstructMethodDefault(TypeErasedRef context);

  template <typename Arg>
  static T ConstructMethodFromObject(TypeErasedRef context);

  template <typename... Args>
  static T ConstructMethodFromMaker(TypeErasedRef context);

  template <typename... Args>
  static T ConstructMethodFromConstMaker(TypeErasedRef context);

  template <typename Arg>
  static T ConstructMethodFromConvertedReference(TypeErasedRef context);

  static T&& ReferenceMethodDefault(TypeErasedRef context,
                                    TemporaryStorage<T>&& storage);

  template <typename Arg>
  static T&& ReferenceMethodFromObject(TypeErasedRef context,
                                       TemporaryStorage<T>&& storage);

  template <typename... Args>
  static T&& ReferenceMethodFromMaker(TypeErasedRef context,
                                      TemporaryStorage<T>&& storage);

  template <typename... Args>
  static T&& ReferenceMethodFromConstMaker(TypeErasedRef context,
                                           TemporaryStorage<T>&& storage);

  template <typename Arg>
  static T&& ReferenceMethodFromConvertedReference(
      TypeErasedRef context, TemporaryStorage<T>&& storage);

 protected:
  struct Methods {
    T (*construct)(TypeErasedRef context);
    T && (*reference)(TypeErasedRef context, TemporaryStorage<T>&& storage);
  };

  explicit InitializerBase(const Methods* methods);

  template <typename Arg>
  explicit InitializerBase(const Methods* methods, Arg&& arg);

  InitializerBase(InitializerBase&& that) = default;
  InitializerBase& operator=(InitializerBase&&) = delete;

  template <typename Dummy = void>
  static constexpr Methods kMethodsDefault = {ConstructMethodDefault,
                                              ReferenceMethodDefault};

  template <typename Arg>
  static constexpr Methods kMethodsFromObject = {
      ConstructMethodFromObject<Arg>, ReferenceMethodFromObject<Arg>};

  template <typename... Args>
  static constexpr Methods kMethodsFromMaker = {
      ConstructMethodFromMaker<Args...>, ReferenceMethodFromMaker<Args...>};

  template <typename... Args>
  static constexpr Methods kMethodsFromConstMaker = {
      ConstructMethodFromConstMaker<Args...>,
      ReferenceMethodFromConstMaker<Args...>};

  template <typename Arg>
  static constexpr Methods kMethodsFromConvertedReference = {
      ConstructMethodFromConvertedReference<Arg>,
      ReferenceMethodFromConvertedReference<Arg>};

  const Methods* methods() const { return methods_; }
  TypeErasedRef context() const { return context_; }

 private:
  const Methods* methods_;
  TypeErasedRef context_;
};

// Part of `Initializer<T>` for `T` being a move-assignable non-reference type.
template <typename T>
class InitializerAssignableBase : public InitializerBase<T> {
 public:
  // `riegeli::Reset(dest, Initializer)` makes `dest` equivalent to the
  // constructed `T`. This avoids constructing a temporary `T` and moving from
  // it.
  friend void RiegeliReset(T& dest, InitializerAssignableBase&& src) {
    src.methods()->reset(src.context(), dest);
  }

 private:
  static void ResetMethodDefault(TypeErasedRef context, T& dest);

  template <typename Arg>
  static void ResetMethodFromObject(TypeErasedRef context, T& dest);

  template <typename... Args>
  static void ResetMethodFromMaker(TypeErasedRef context, T& dest);

  template <typename... Args>
  static void ResetMethodFromConstMaker(TypeErasedRef context, T& dest);

  template <typename Arg>
  static void ResetMethodFromConvertedReference(TypeErasedRef context, T& dest);

 protected:
  struct Methods : InitializerAssignableBase::InitializerBase::Methods {
    void (*reset)(TypeErasedRef context, T& dest);
  };

  template <typename Dummy = void>
  static constexpr Methods kMethodsDefault = {
      InitializerAssignableBase::InitializerBase::template kMethodsDefault<>,
      ResetMethodDefault};

  template <typename Arg>
  static constexpr Methods kMethodsFromObject = {
      InitializerAssignableBase::InitializerBase::template kMethodsFromObject<
          Arg>,
      ResetMethodFromObject<Arg>};

  template <typename... Args>
  static constexpr Methods kMethodsFromMaker = {
      InitializerAssignableBase::InitializerBase::template kMethodsFromMaker<
          Args...>,
      ResetMethodFromMaker<Args...>};

  template <typename... Args>
  static constexpr Methods kMethodsFromConstMaker = {
      InitializerAssignableBase::InitializerBase::
          template kMethodsFromConstMaker<Args...>,
      ResetMethodFromConstMaker<Args...>};

  template <typename Arg>
  static constexpr Methods kMethodsFromConvertedReference = {
      InitializerAssignableBase::InitializerBase::
          template kMethodsFromConvertedReference<Arg>,
      ResetMethodFromConvertedReference<Arg>};

  explicit InitializerAssignableBase(const Methods* methods)
      : InitializerAssignableBase::InitializerBase(methods) {}

  template <typename Arg>
  explicit InitializerAssignableBase(const Methods* methods, Arg&& arg)
      : InitializerAssignableBase::InitializerBase(methods,
                                                   std::forward<Arg>(arg)) {}

  InitializerAssignableBase(InitializerAssignableBase&& that) = default;
  InitializerAssignableBase& operator=(InitializerAssignableBase&&) = delete;

  const Methods* methods() const {
    return static_cast<const Methods*>(
        InitializerAssignableBase::InitializerBase::methods());
  }
};

// Part of `Initializer<T>` for `T` being a reference type.
template <typename T>
class InitializerReference {
 public:
  // Constructs the `T`.
  /*implicit*/ operator T() && { return std::move(*this).Construct(); }

  // Constructs the `T`.
  //
  // Usually conversion to `T` is preferred because it leads to simpler source
  // code. An explicit `Construct()` call can force construction right away
  // while avoiding specifying the full target type.
  T Construct() && { return methods()->construct(context()); }

  // Constructs the `std::decay_t<T>` on the heap.
  //
  // In contrast to `std::make_unique()`, this supports custom deleters.
  //
  // For a non-default-constructed deleter, use `UniquePtr(deleter)`.
  template <typename Target, typename Deleter,
            std::enable_if_t<std::is_convertible_v<std::decay_t<T>*, Target*>,
                             int> = 0>
  /*implicit*/ operator std::unique_ptr<Target, Deleter>() && {
    return std::move(*this).template UniquePtr<Deleter>();
  }
  template <typename Target, typename Deleter,
            std::enable_if_t<std::is_convertible_v<std::decay_t<T>*, Target*>,
                             int> = 0>
  /*implicit*/ operator std::unique_ptr<Target, Deleter>() const& {
    return UniquePtr<Deleter>();
  }

  // Constructs the `std::decay_t<T>` on the heap.
  //
  // In contrast to `std::make_unique()`, this supports custom deleters.
  //
  // Usually conversion to `std::unique_ptr` is preferred because it leads to
  // simpler source code. An explicit `UniquePtr()` call can force construction
  // right away while avoiding writing the full target type, and it allows to
  // use a non-default-constructed deleter.
  template <typename Deleter = std::default_delete<std::decay_t<T>>,
            typename DependentT = T,
            std::enable_if_t<
                std::is_constructible_v<std::decay_t<DependentT>, DependentT>,
                int> = 0>
  std::unique_ptr<std::decay_t<T>, Deleter> UniquePtr() && {
    return std::unique_ptr<std::decay_t<T>, Deleter>(
        new std::decay_t<T>(std::move(*this)));
  }
  template <typename Deleter, typename DependentT = T,
            std::enable_if_t<
                std::is_constructible_v<std::decay_t<DependentT>, DependentT>,
                int> = 0>
  std::unique_ptr<std::decay_t<T>, Deleter> UniquePtr(Deleter&& deleter) && {
    return std::unique_ptr<std::decay_t<T>, Deleter>(
        new std::decay_t<T>(std::move(*this)), std::forward<Deleter>(deleter));
  }

  // `Reference()` can be defined in terms of conversion to `T` because
  // reference storage is never used for reference types.
  //
  // Unused `storage` parameter makes the signature compatible with the
  // non-reference specialization.
  T&& Reference() && ABSL_ATTRIBUTE_LIFETIME_BOUND {
    // `T` is a reference type here, so `T&&` is the same as `T`.
    return std::move(*this).Construct();
  }
  T&& Reference(ABSL_ATTRIBUTE_UNUSED TemporaryStorage<T>&& storage) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(*this).Reference();
  }

 private:
  template <typename Arg>
  static T ConstructMethodFromObject(TypeErasedRef context);

  template <typename... Args>
  static T ConstructMethodFromMaker(TypeErasedRef context);

  template <typename... Args>
  static T ConstructMethodFromConstMaker(TypeErasedRef context);

  template <typename Arg>
  static T ConstructMethodFromConvertedReference(TypeErasedRef context);

 protected:
  struct Methods {
    T (*construct)(TypeErasedRef context);
  };

  explicit InitializerReference(const Methods* methods);

  template <typename Arg>
  explicit InitializerReference(const Methods* methods, Arg&& arg);

  InitializerReference(InitializerReference&& that) = default;
  InitializerReference& operator=(InitializerReference&&) = delete;

  template <typename Arg>
  static constexpr Methods kMethodsFromObject = {
      ConstructMethodFromObject<Arg>};

  template <typename... Args>
  static constexpr Methods kMethodsFromMaker = {
      ConstructMethodFromMaker<Args...>};

  template <typename... Args>
  static constexpr Methods kMethodsFromConstMaker = {
      ConstructMethodFromConstMaker<Args...>};

  template <typename Arg>
  static constexpr Methods kMethodsFromConvertedReference = {
      ConstructMethodFromConvertedReference<Arg>};

  const Methods* methods() const { return methods_; }
  TypeErasedRef context() const { return context_; }

 private:
  const Methods* methods_;
  TypeErasedRef context_;
};

template <typename T, typename Enable = void>
struct InitializerImpl;

template <typename T>
struct InitializerImpl<T, std::enable_if_t<!std::is_convertible_v<T&&, T>>> {
  using type = InitializerBase<T>;
};

template <typename T>
struct InitializerImpl<
    T, std::enable_if_t<std::conjunction_v<
           std::negation<std::is_reference<T>>, std::is_convertible<T&&, T>,
           std::negation<std::is_move_assignable<T>>>>> {
  using type = InitializerBase<T>;
};

template <typename T>
struct InitializerImpl<
    T, std::enable_if_t<std::conjunction_v<std::negation<std::is_reference<T>>,
                                           std::is_convertible<T&&, T>,
                                           std::is_move_assignable<T>>>> {
  using type = InitializerAssignableBase<T>;
};

template <typename T>
struct InitializerImpl<T, std::enable_if_t<std::is_reference_v<T>>> {
  using type = InitializerReference<T>;
};

}  // namespace initializer_internal

// A parameter of type `Initializer<T>` allows the caller to specify a `T` by
// passing a value convertible to `T`, or constructor arguments for `T` packed
// in `riegeli::Maker(args...)` or `riegeli::Maker<T>(args...)`.
//
// In contrast to accepting `T` directly, this allows to construct the object
// in-place, avoiding constructing a temporary and moving from it. This also
// avoids separate overloads for `const T&` and `T&&` or a template.
//
// `Initializer<T>(arg)` does not own `arg`, even if it involves temporaries,
// hence it should be used only as a parameter of a function or constructor,
// so that the temporaries outlive its usage. Instead of storing an
// `Initializer<T>` in a variable or returning it from a function, consider
// `riegeli::OwningMaker<T>(args...)`, `MakerTypeFor<T, Args...>`, or `T`.
template <typename T>
class ABSL_NULLABILITY_COMPATIBLE Initializer
    : public initializer_internal::InitializerImpl<T>::type {
 private:
  using Base = typename initializer_internal::InitializerImpl<T>::type;

 public:
  // Constructs `Initializer<T>` which specifies `T()`.
  template <
      typename DependentT = T,
      std::enable_if_t<std::is_default_constructible_v<DependentT>, int> = 0>
  Initializer() : Base(&Base::template kMethodsDefault<>) {}

  // Constructs `Initializer<T>` from a value convertible to `T`.
  template <
      typename Arg,
      std::enable_if_t<
          std::conjunction_v<std::negation<initializer_internal::IsInitializer<
                                 T, std::decay_t<Arg>>>,
                             std::is_convertible<Arg&&, T>>,
          int> = 0>
  /*implicit*/ Initializer(Arg&& arg ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : Base(&Base::template kMethodsFromObject<Arg>, std::forward<Arg>(arg)) {}

  // Constructs `Initializer<T&>` from `std::reference_wrapper<Arg>` with a
  // compatible `Arg`.
  template <typename Arg,
            std::enable_if_t<
                std::conjunction_v<
                    std::is_reference<T>,
                    std::is_convertible<Arg*, std::remove_reference_t<T>*>>,
                int> = 0>
  /*implicit*/ Initializer(std::reference_wrapper<Arg> arg)
      : Base(&Base::template kMethodsFromObject<Arg&>, arg.get()) {}

  // Constructs `Initializer<T>` from constructor arguments for `T` packed in
  // `riegeli::Maker(args...)`.
  //
  // Prefer `Template(riegeli::Maker<T>(args...))` over
  // `Template<T>(riegeli::Maker(args...))` if CTAD for `Template` can be used.
  template <typename... Args,
            std::enable_if_t<std::is_constructible_v<T, Args&&...>, int> = 0>
  /*implicit*/ Initializer(
      MakerType<Args...>&& args ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : Base(&Base::template kMethodsFromMaker<Args...>, std::move(args)) {}
  template <
      typename... Args,
      std::enable_if_t<std::is_constructible_v<T, const Args&...>, int> = 0>
  /*implicit*/ Initializer(
      const MakerType<Args...>& args ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : Base(&Base::template kMethodsFromConstMaker<Args...>, args) {}

  // Constructs `Initializer<T>` from constructor arguments for `T` packed in
  // `riegeli::Maker<T>(args...)`.
  template <
      typename... Args,
      std::enable_if_t<std::conjunction_v<std::negation<std::is_convertible<
                                              MakerTypeFor<T, Args...>&&, T>>,
                                          std::is_constructible<T, Args&&...>>,
                       int> = 0>
  /*implicit*/ Initializer(
      MakerTypeFor<T, Args...>&& args ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : Base(&Base::template kMethodsFromMaker<Args...>,
             std::move(args).maker()) {}
  template <typename... Args,
            std::enable_if_t<
                std::conjunction_v<std::negation<std::is_convertible<
                                       const MakerTypeFor<T, Args...>&, T>>,
                                   std::is_constructible<T, const Args&...>>,
                int> = 0>
  /*implicit*/ Initializer(
      const MakerTypeFor<T, Args...>& args ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : Base(&Base::template kMethodsFromConstMaker<Args...>, args.maker()) {}

  // Constructs `Initializer<T>` from constructor arguments for `T` packed in
  // `riegeli::Maker<Target>(args...)` with a different but compatible `Target`.
  template <typename Target, typename... Args,
            std::enable_if_t<
                std::conjunction_v<std::negation<std::is_same<Target, T>>,
                                   std::negation<std::is_convertible<
                                       MakerTypeFor<Target, Args...>&&, T>>,
                                   std::is_constructible<Target, Args&&...>,
                                   IsConvertibleFromResult<T, Target&&>>,
                int> = 0>
  /*implicit*/ Initializer(
      MakerTypeFor<Target, Args...>&& args ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : Base(&Base::template kMethodsFromConvertedReference<
                 MakerTypeFor<Target, Args...>>,
             std::move(args)) {}
  template <typename Target, typename... Args,
            std::enable_if_t<std::conjunction_v<
                                 std::negation<std::is_same<Target, T>>,
                                 std::negation<std::is_convertible<
                                     const MakerTypeFor<Target, Args...>&, T>>,
                                 std::is_constructible<Target, const Args&...>,
                                 IsConvertibleFromResult<T, Target&&>>,
                             int> = 0>
  /*implicit*/ Initializer(
      const MakerTypeFor<Target, Args...>& args ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : Base(&Base::template kMethodsFromConvertedReference<
                 const MakerTypeFor<Target, Args...>&>,
             args) {}

  // Constructs `Initializer<T>` from a factory function for `T` packed in
  // `riegeli::Invoker(function, args...)` with a possibly different but
  // compatible function result.
  template <
      typename Function, typename... Args,
      std::enable_if_t<std::conjunction_v<
                           std::negation<std::is_convertible<
                               InvokerType<Function, Args...>&&, T>>,
                           IsConvertibleFromResult<
                               T, std::invoke_result_t<Function&&, Args&&...>>>,
                       int> = 0>
  /*implicit*/ Initializer(
      InvokerType<Function, Args...>&& invoker ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : Base(&Base::template kMethodsFromObject<InvokerType<Function, Args...>>,
             std::move(invoker)) {}
  template <
      typename Function, typename... Args,
      std::enable_if_t<
          std::conjunction_v<
              std::negation<std::is_convertible<
                  const InvokerType<Function, Args...>&, T>>,
              IsConvertibleFromResult<
                  T, std::invoke_result_t<const Function&, const Args&...>>>,
          int> = 0>
  /*implicit*/ Initializer(const InvokerType<Function, Args...>& invoker
                               ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : Base(&Base::template kMethodsFromObject<
                 const InvokerType<Function, Args...>&>,
             invoker) {}

  // Constructs `Initializer<T>` from `Initializer<Target>` with a different but
  // compatible `Target`.
  template <
      typename Target,
      std::enable_if_t<
          std::conjunction_v<
              std::negation<std::is_same<Target, T>>,
              std::negation<std::is_convertible<Initializer<Target>&&, T>>,
              IsConvertibleFromResult<T, Target&&>>,
          int> = 0>
  /*implicit*/ Initializer(Initializer<Target>&& initializer)
      : Base(
            &Base::template kMethodsFromConvertedReference<Initializer<Target>>,
            std::move(initializer)) {}

  Initializer(Initializer&& that) = default;
  Initializer& operator=(Initializer&&) = delete;

  // For `ABSL_NULLABILITY_COMPATIBLE`.
  using pointer = std::conditional_t<std::is_pointer_v<T>, T, void*>;
};

// `Target<T>::type` and `TargetT<T>` deduce the appropriate target type such
// that `T` is convertible to `Initializer<TargetT<T>>`.
//
// This allows a single template to uniformly handle a `Target` passed directly,
// as `riegeli::Maker<Target>(args...)`, as
// `riegeli::Invoker(function, args...)`, or as `Initializer<Target>`. This is
// also useful for CTAD guides to deduce a template argument as `TargetT<T>`.
//
// They are undefined in the case of `riegeli::Maker(args...)` which requires
// the target type to be specified by the caller, or when the object is not
// usable in the given const and reference context.

namespace initializer_internal {

template <typename Value, typename Reference>
struct TargetImpl {
  using type = Value;
};

template <typename T, typename Reference>
struct TargetImpl<std::reference_wrapper<T>, Reference> {
  using type = T&;
};

template <typename... Args, typename Reference>
struct TargetImpl<MakerType<Args...>, Reference> {
  // No `type` member when the target type is unspecified.
};

template <typename Target, typename... Args, typename Reference>
struct TargetImpl<MakerTypeFor<Target, Args...>, Reference>
    : MakerTarget<Reference> {};

template <typename Function, typename... Args, typename Reference>
struct TargetImpl<InvokerType<Function, Args...>, Reference>
    : InvokerTarget<Reference> {};

template <typename T, typename Reference>
struct TargetImpl<Initializer<T>, Reference> {
  using type = T;
};

};  // namespace initializer_internal

template <typename T>
struct Target : initializer_internal::TargetImpl<std::decay_t<T>, T&&> {};

template <typename T>
using TargetT = typename Target<T>::type;

// `TargetRef<T>::type` and `TargetRefT<T>` are like `TargetT<T>`, but if the
// object is already constructed, then they are the corresponding reference type
// instead of the value type. It is still true that `T` is convertible to
// `Initializer<TargetRefT<T>>`.
//
// This allows to avoid moving or copying the object if a reference to it is
// sufficient.

namespace initializer_internal {

template <typename Value, typename Reference>
struct TargetRefImpl {
  using type = Reference;
};

template <typename T, typename Reference>
struct TargetRefImpl<std::reference_wrapper<T>, Reference> {
  using type = T&;
};

template <typename... Args, typename Reference>
struct TargetRefImpl<MakerType<Args...>, Reference> {
  // No `type` member when the target type is unspecified.
};

template <typename Target, typename... Args, typename Reference>
struct TargetRefImpl<MakerTypeFor<Target, Args...>, Reference>
    : MakerTarget<Reference> {};

template <typename Function, typename... Args, typename Reference>
struct TargetRefImpl<InvokerType<Function, Args...>, Reference>
    : InvokerTargetRef<Reference> {};

template <typename T, typename Reference>
struct TargetRefImpl<Initializer<T>, Reference> {
  using type = T;
};

};  // namespace initializer_internal

template <typename T>
struct TargetRef : initializer_internal::TargetRefImpl<std::decay_t<T>, T&&> {};

template <typename T>
using TargetRefT = typename TargetRef<T>::type;

// Implementation details follow.

namespace initializer_internal {

template <typename T>
inline InitializerBase<T>::InitializerBase(const Methods* methods)
    : methods_(methods) {}

template <typename T>
template <typename Arg>
inline InitializerBase<T>::InitializerBase(const Methods* methods, Arg&& arg)
    : methods_(methods), context_(std::forward<Arg>(arg)) {}

template <typename T>
T InitializerBase<T>::ConstructMethodDefault(
    ABSL_ATTRIBUTE_UNUSED TypeErasedRef context) {
  return T();
}

template <typename T>
template <typename Arg>
T InitializerBase<T>::ConstructMethodFromObject(TypeErasedRef context) {
  return T(context.Cast<Arg>());
}

template <typename T>
template <typename... Args>
T InitializerBase<T>::ConstructMethodFromMaker(TypeErasedRef context) {
  return context.Cast<MakerType<Args...>>().template Construct<T>();
}

template <typename T>
template <typename... Args>
T InitializerBase<T>::ConstructMethodFromConstMaker(TypeErasedRef context) {
  return context.Cast<const MakerType<Args...>&>().template Construct<T>();
}

template <typename T>
template <typename Arg>
T InitializerBase<T>::ConstructMethodFromConvertedReference(
    TypeErasedRef context) {
  return T(context.Cast<Arg>().Reference());
}

template <typename T>
T&& InitializerBase<T>::ReferenceMethodDefault(
    ABSL_ATTRIBUTE_UNUSED TypeErasedRef context,
    TemporaryStorage<T>&& storage) {
  return std::move(storage).emplace();
}

template <typename T>
template <typename Arg>
T&& InitializerBase<T>::ReferenceMethodFromObject(
    TypeErasedRef context, TemporaryStorage<T>&& storage) {
  if constexpr (CanBindReference<T&&, Arg&&>::value) {
    return BindReference<T&&>(context.Cast<Arg>());
  } else {
    return std::move(storage).emplace(context.Cast<Arg>());
  }
}

template <typename T>
template <typename... Args>
T&& InitializerBase<T>::ReferenceMethodFromMaker(
    TypeErasedRef context, TemporaryStorage<T>&& storage) {
  return context.Cast<MakerType<Args...>>().template Reference<T>(
      std::move(storage));
}

template <typename T>
template <typename... Args>
T&& InitializerBase<T>::ReferenceMethodFromConstMaker(
    TypeErasedRef context, TemporaryStorage<T>&& storage) {
  return context.Cast<const MakerType<Args...>&>().template Reference<T>(
      std::move(storage));
}

template <typename T>
template <typename Arg>
T&& InitializerBase<T>::ReferenceMethodFromConvertedReference(
    TypeErasedRef context, TemporaryStorage<T>&& storage) {
  return std::move(storage).emplace(context.Cast<Arg>().Reference());
}

template <typename T>
void InitializerAssignableBase<T>::ResetMethodDefault(
    ABSL_ATTRIBUTE_UNUSED TypeErasedRef context, T& dest) {
  riegeli::Reset(dest);
}

template <typename T>
template <typename Arg>
void InitializerAssignableBase<T>::ResetMethodFromObject(TypeErasedRef context,
                                                         T& dest) {
  riegeli::Reset(dest, context.Cast<Arg>());
}

template <typename T>
template <typename... Args>
void InitializerAssignableBase<T>::ResetMethodFromMaker(TypeErasedRef context,
                                                        T& dest) {
  riegeli::Reset(dest, context.Cast<MakerType<Args...>>());
}

template <typename T>
template <typename... Args>
void InitializerAssignableBase<T>::ResetMethodFromConstMaker(
    TypeErasedRef context, T& dest) {
  riegeli::Reset(dest, context.Cast<const MakerType<Args...>&>());
}

template <typename T>
template <typename Arg>
void InitializerAssignableBase<T>::ResetMethodFromConvertedReference(
    TypeErasedRef context, T& dest) {
  riegeli::Reset(dest, context.Cast<Arg>().Reference());
}

template <typename T>
inline InitializerReference<T>::InitializerReference(const Methods* methods)
    : methods_(methods) {}

template <typename T>
template <typename Arg>
inline InitializerReference<T>::InitializerReference(const Methods* methods,
                                                     Arg&& arg)
    : methods_(methods), context_(std::forward<Arg>(arg)) {}

template <typename T>
template <typename Arg>
T InitializerReference<T>::ConstructMethodFromObject(TypeErasedRef context) {
  return T(context.Cast<Arg>());
}

template <typename T>
template <typename... Args>
T InitializerReference<T>::ConstructMethodFromMaker(TypeErasedRef context) {
  return context.Cast<MakerType<Args...>>().template Construct<T>();
}

template <typename T>
template <typename... Args>
T InitializerReference<T>::ConstructMethodFromConstMaker(
    TypeErasedRef context) {
  return context.Cast<const MakerType<Args...>&>().template Construct<T>();
}

template <typename T>
template <typename Arg>
T InitializerReference<T>::ConstructMethodFromConvertedReference(
    TypeErasedRef context) {
  return T(context.Cast<Arg>().Reference());
}

}  // namespace initializer_internal

}  // namespace riegeli

#endif  // RIEGELI_BASE_INITIALIZER_H_


================================================
FILE: riegeli/base/initializer_internal.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_INITIALIZER_INTERNAL_H_
#define RIEGELI_BASE_INITIALIZER_INTERNAL_H_

#include <stddef.h>

#include <type_traits>
#include <utility>

#include "absl/base/casts.h"
#include "absl/base/nullability.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli::initializer_internal {

// `CanBindReference<T&&, Arg&&>::value` is `true` if `Arg&&` can be implicitly
// converted to `T&&` without creating a temporary.
//
// Due to not all compilers implementing http://wg21.link/cwg2352 (converting
// `T*&` to `const T* const&` could have bound the result to a temporary),
// this covers also the case when the corresponding pointers can be converted.
// `BindReference()` should be used for the actual conversion.

template <typename T, typename Arg>
struct CanBindReference : std::false_type {};

template <typename T, typename Arg>
struct CanBindReference<T&, Arg&> : std::is_convertible<Arg*, T*> {};

template <typename T, typename Arg>
struct CanBindReference<T&, Arg&&> : std::false_type {};

template <typename T, typename Arg>
struct CanBindReference<const T&, Arg&&> : std::is_convertible<Arg*, const T*> {
};

template <typename T, typename Arg>
struct CanBindReference<T&&, Arg&> : std::false_type {};

template <typename T, typename Arg>
struct CanBindReference<T&&, Arg&&> : std::is_convertible<Arg*, T*> {};

// `BindReference<T&&>(arg)` returns `arg` implicitly converted to `T&&`.
//
// Due to not all compilers implementing http://wg21.link/cwg2352 (converting
// `T*&` to `const T* const&` could have bound the result to a temporary),
// this is not implemented as a simple implicit conversion, but by converting
// the reference to a pointer, implicitly converting the pointer, and
// dereferencing back.
template <typename T, typename Arg,
          std::enable_if_t<CanBindReference<T&&, Arg&&>::value, int> = 0>
inline T&& BindReference(Arg&& arg) {
  return std::forward<T>(
      *absl::implicit_cast<std::remove_reference_t<T>*>(&arg));
}

}  // namespace riegeli::initializer_internal

#endif  // RIEGELI_BASE_INITIALIZER_INTERNAL_H_


================================================
FILE: riegeli/base/intrusive_shared_ptr.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_INTRUSIVE_SHARED_PTR_H_
#define RIEGELI_BASE_INTRUSIVE_SHARED_PTR_H_

#include <stddef.h>

#include <cstddef>
#include <memory>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/external_data.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/ownership.h"

namespace riegeli {

namespace intrusive_shared_ptr_internal {

template <typename T, typename Enable = void>
struct HasHasUniqueOwner : std::false_type {};

template <typename T>
struct HasHasUniqueOwner<
    T, std::enable_if_t<std::is_convertible_v<
           decltype(std::declval<const T&>().HasUniqueOwner()), bool>>>
    : std::true_type {};

template <typename T, typename Enable = void>
struct HasGetCount : std::false_type {};

template <typename T>
struct HasGetCount<T,
                   std::enable_if_t<std::is_convertible_v<
                       decltype(std::declval<const T&>().GetCount()), size_t>>>
    : std::true_type {};

}  // namespace intrusive_shared_ptr_internal

// `IntrusiveSharedPtr<T>` implements shared ownership of an object of type `T`.
// It can also be empty, with the pointer being `nullptr`.
//
// The actual object can be of a subtype of `T`, as long as `T::Unref()`
// correctly deletes the object in such a case, which typically requires that
// `T` has a virtual destructor.
//
// `T` maintains its own reference count, e.g. as a member of type `RefCount`.
// `T` should support:
//
// ```
//   // Increments the reference count of `*this`.
//   void Ref() const;
//
//   // Decrements the reference count of `*this`. Deletes `this` when the
//   // reference count reaches 0.
//   void Unref() const;
//
//   // Returns `true` if there is only one owner of the object.
//   //
//   // This can be used to check if the object may be modified.
//   //
//   // Optional. Needed for `IntrusiveSharedPtr::IsUnique()`.
//   bool HasUniqueOwner() const;
// ```
//
// Compared to `std::shared_ptr`, `IntrusiveSharedPtr` supports `IsUnique()`,
// and has a smaller memory overhead (the pointer has 1 word instead of 2, the
// object typically has 1 word of overhead instead of 3). OTOH it requires
// cooperation from `T`, and has fewer features, e.g. no aliasing constructor,
// no weak pointers.
//
// Compared to `SharedPtr`, `IntrusiveSharedPtr` is harder to use, because
// it requires the object to maintain its own reference count. OTOH
// `IntrusiveSharedPtr` supports custom allocation and deallocation, and
// conversion to an `IntrusiveSharedPtr` to a non-leftmost or virtual base
// class. Prefer `SharedPtr` unless `IntrusiveSharedPtr` is needed.
template <typename T>
class ABSL_ATTRIBUTE_TRIVIAL_ABI ABSL_NULLABILITY_COMPATIBLE IntrusiveSharedPtr
    : public WithEqual<IntrusiveSharedPtr<T>> {
 public:
  // Creates an empty `IntrusiveSharedPtr`.
  constexpr IntrusiveSharedPtr() = default;
  /*implicit*/ constexpr IntrusiveSharedPtr(std::nullptr_t) noexcept {}
  IntrusiveSharedPtr& operator=(std::nullptr_t) {
    Reset();
    return *this;
  }

  // Creates an `IntrusiveSharedPtr` holding `ptr`.
  //
  // Takes ownership of `ptr` unless the second parameter is `kShareOwnership`.
  explicit IntrusiveSharedPtr(T* ptr ABSL_ATTRIBUTE_LIFETIME_BOUND,
                              PassOwnership = kPassOwnership) noexcept
      : ptr_(ptr) {}
  explicit IntrusiveSharedPtr(T* ptr ABSL_ATTRIBUTE_LIFETIME_BOUND,
                              ShareOwnership) noexcept
      : ptr_(Ref(ptr)) {}

  // Creates an `IntrusiveSharedPtr` holding a constructed value.
  //
  // The object is constructed with `new`, which means that `T::Unref()` should
  // delete the object with `delete this`.
  explicit IntrusiveSharedPtr(Initializer<T> value) : ptr_(std::move(value)) {}

  // Creates an `IntrusiveSharedPtr` holding a constructed value of a compatible
  // type.
  //
  // The object is constructed with `new`, which means that `T::Unref()` should
  // delete the object with `delete this`.
  template <typename SubInitializer,
            std::enable_if_t<
                std::is_convertible_v<TargetT<SubInitializer>*, T*>, int> = 0>
  explicit IntrusiveSharedPtr(SubInitializer&& value)
      : ptr_(Initializer<TargetT<SubInitializer>>(
            std::forward<SubInitializer>(value))) {}

  // Converts from an `IntrusiveSharedPtr` with a compatible type.
  template <typename SubT,
            std::enable_if_t<std::is_convertible_v<SubT*, T*>, int> = 0>
  /*implicit*/ IntrusiveSharedPtr(const IntrusiveSharedPtr<SubT>& that) noexcept
      : ptr_(Ref(that.ptr_.get())) {}
  template <typename SubT,
            std::enable_if_t<std::is_convertible_v<SubT*, T*>, int> = 0>
  IntrusiveSharedPtr& operator=(const IntrusiveSharedPtr<SubT>& that) noexcept {
    ptr_.reset(Ref(that.ptr_.get()));
    return *this;
  }

  // Converts from an `IntrusiveSharedPtr` with a compatible type.
  //
  // The source `IntrusiveSharedPtr` is left empty.
  template <typename SubT,
            std::enable_if_t<std::is_convertible_v<SubT*, T*>, int> = 0>
  /*implicit*/ IntrusiveSharedPtr(IntrusiveSharedPtr<SubT>&& that) noexcept
      : ptr_(std::move(that).ptr_) {}
  template <typename SubT,
            std::enable_if_t<std::is_convertible_v<SubT*, T*>, int> = 0>
  IntrusiveSharedPtr& operator=(IntrusiveSharedPtr<SubT>&& that) noexcept {
    ptr_.reset(std::move(that).ptr_);
    return *this;
  }

  IntrusiveSharedPtr(const IntrusiveSharedPtr& that) noexcept
      : ptr_(Ref(that.ptr_.get())) {}
  IntrusiveSharedPtr& operator=(const IntrusiveSharedPtr& that) noexcept {
    ptr_.reset(Ref(that.ptr_.get()));
    return *this;
  }

  // The source `IntrusiveSharedPtr` is left empty.
  IntrusiveSharedPtr(IntrusiveSharedPtr&& that) = default;
  IntrusiveSharedPtr& operator=(IntrusiveSharedPtr&& that) = default;

  // Replaces the object, or makes `*this` empty if `ptr == nullptr`.
  //
  // Takes ownership of `ptr` unless the second parameter is `kShareOwnership`.
  //
  // The old object, if any, is destroyed afterwards.
  ABSL_ATTRIBUTE_REINITIALIZES
  void Reset(T* ptr = nullptr, PassOwnership = kPassOwnership) {
    ptr_.reset(ptr);
  }
  ABSL_ATTRIBUTE_REINITIALIZES
  void Reset(T* ptr, ShareOwnership) { ptr_.reset(Ref(ptr)); }

  // Replaces the object with a constructed value.
  //
  // The old object, if any, is destroyed afterwards.
  //
  // The object is constructed with `new`, which means that `T::Unref()` should
  // delete the object with `delete this`.
  //
  // If `T` supports `HasUniqueOwner()` and `*this` is the only owner of an
  // object known to have the same move-assignable type, the existing object is
  // assigned or reset instead of allocating and constructing a new object.
  ABSL_ATTRIBUTE_REINITIALIZES
  void Reset(Initializer<T> value) { ResetImpl(std::move(value)); }

  // Replaces the object with a constructed value of a compatible type.
  //
  // The old object, if any, is destroyed afterwards.
  //
  // The object is constructed with `new`, which means that `T::Unref()` should
  // delete the object with `delete this`.
  template <typename SubInitializer,
            std::enable_if_t<
                std::is_convertible_v<TargetT<SubInitializer>*, T*>, int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(SubInitializer&& value) {
    ptr_ = Initializer<TargetT<SubInitializer>>(
        std::forward<SubInitializer>(value));
  }

  // Returns `true` if `*this` is the only owner of the object.
  //
  // This can be used to check if the object may be modified (in contrast to
  // `std::shared_ptr::unique()`).
  //
  // If `*this` is empty, returns `false`.
  //
  // Supported if `T` supports `HasUniqueOwner()`.
  template <typename DependentT = T,
            std::enable_if_t<intrusive_shared_ptr_internal::HasHasUniqueOwner<
                                 DependentT>::value,
                             int> = 0>
  bool IsUnique() const {
    return ptr_ != nullptr && ptr_->HasUniqueOwner();
  }

  // Returns the current reference count.
  //
  // If the `IntrusiveSharedPtr` is accessed by multiple threads, this is a
  // snapshot of the count which may change asynchronously, hence usage of
  // `GetRefCount()` should be limited to cases not important for correctness,
  // like producing debugging output.
  //
  // The reference count can be reliably compared against 1 with `IsUnique()`.
  //
  // Supported if `T` supports `GetCount()`.
  template <typename DependentT = T,
            std::enable_if_t<
                intrusive_shared_ptr_internal::HasGetCount<DependentT>::value,
                int> = 0>
  size_t GetRefCount() const {
    if (ptr_ == nullptr) return 0;
    return ptr_->GetRefCount();
  }

  // Returns the pointer.
  T* get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return ptr_.get(); }

  // Dereferences the pointer.
  T& operator*() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT_NE(ptr_, nullptr)
        << "Failed precondition of IntrusiveSharedPtr::operator*: null pointer";
    return *ptr_;
  }
  T* operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT_NE(ptr_, nullptr)
        << "Failed precondition of IntrusiveSharedPtr::operator->: null "
           "pointer";
    return ptr_.get();
  }

  // Returns the pointer. This `IntrusiveSharedPtr` is left empty.
  T* Release() { return ptr_.release(); }

  template <typename OtherT>
  friend bool operator==(const IntrusiveSharedPtr& a,
                         const IntrusiveSharedPtr<OtherT>& b) {
    return a.get() == b.get();
  }
  friend bool operator==(const IntrusiveSharedPtr& a, std::nullptr_t) {
    return a.get() == nullptr;
  }

  // Indicates support for:
  //  * `ExternalRef(const IntrusiveSharedPtr&, substr)`
  //  * `ExternalRef(IntrusiveSharedPtr&&, substr)`
  friend void RiegeliSupportsExternalRef(const IntrusiveSharedPtr*) {}

  // Supports `ExternalRef`.
  friend ExternalStorage RiegeliToExternalStorage(IntrusiveSharedPtr* self) {
    return ExternalStorage(const_cast<std::remove_cv_t<T>*>(self->Release()),
                           [](void* ptr) {
                             if (ptr != nullptr) static_cast<T*>(ptr)->Unref();
                           });
  }

  // Supports `riegeli::Debug()`.
  template <typename DebugStream>
  friend void RiegeliDebug(const IntrusiveSharedPtr& src, DebugStream& dest) {
    dest.Debug(src.get());
  }

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const IntrusiveSharedPtr* self,
                                        MemoryEstimator& memory_estimator) {
    if (memory_estimator.RegisterNode(self->get())) {
      memory_estimator.RegisterDynamicObject(self->get());
    }
  }

 private:
  // For converting from a `SharedPtr` with a compatible type.
  template <typename SubT>
  friend class IntrusiveSharedPtr;

  using pointer = T*;  // For `ABSL_NULLABILITY_COMPATIBLE`.

  struct Unrefer {
    void operator()(T* ptr) const { ptr->Unref(); }
  };

  template <typename SubT>
  static SubT* Ref(SubT* ptr) {
    if (ptr != nullptr) ptr->Ref();
    return ptr;
  }

  template <typename DependentT>
  struct IsAssignable
      : std::conjunction<
            intrusive_shared_ptr_internal::HasHasUniqueOwner<DependentT>,
            std::disjunction<
                std::negation<std::has_virtual_destructor<DependentT>>,
                std::is_final<DependentT>>,
            std::is_move_assignable<DependentT>> {};

  void ResetImpl(Initializer<T> value) {
    if constexpr (IsAssignable<T>::value) {
      if (IsUnique()) {
        *ptr_ = std::move(value);
        return;
      }
    }
    ptr_ = std::move(value);
  }

  std::unique_ptr<T, Unrefer> ptr_;
};

template <typename T>
explicit IntrusiveSharedPtr(T* ptr, PassOwnership = kPassOwnership)
    -> IntrusiveSharedPtr<T>;
template <typename T>
explicit IntrusiveSharedPtr(T* ptr, ShareOwnership) -> IntrusiveSharedPtr<T>;
template <typename T, std::enable_if_t<!std::is_pointer_v<T>, int> = 0>
explicit IntrusiveSharedPtr(T&& value) -> IntrusiveSharedPtr<TargetT<T>>;

}  // namespace riegeli

#endif  // RIEGELI_BASE_INTRUSIVE_SHARED_PTR_H_


================================================
FILE: riegeli/base/invoker.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_INVOKER_H_
#define RIEGELI_BASE_INVOKER_H_

#include <stddef.h>

#include <memory>
#include <tuple>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "riegeli/base/type_traits.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

template <typename Function, typename... Args>
class InvokerType;

namespace invoker_internal {

template <typename Function, typename... Args>
class InvokerBase
    : public ConditionallyAssignable<
          std::conjunction_v<std::negation<std::is_reference<Args>>...>> {
 protected:
  template <typename DependentFunction = Function>
  using Result = std::invoke_result_t<DependentFunction&&, Args&&...>;
  template <typename DependentFunction = Function>
  using ConstResult =
      std::invoke_result_t<const DependentFunction&, const Args&...>;

 public:
  // Constructs `InvokerType` from `function` convertible to `Function` and
  // `args...` convertible to `Args...`.
  template <
      typename SrcFunction, typename... SrcArgs,
      std::enable_if_t<
          std::conjunction_v<NotSameRef<InvokerBase, SrcFunction, SrcArgs...>,
                             std::is_invocable<Function&&, Args&&...>,
                             std::is_convertible<SrcFunction&&, Function>,
                             std::is_convertible<SrcArgs&&, Args>...>,
          int> = 0>
  /*implicit*/ InvokerBase(SrcFunction&& function, SrcArgs&&... args)
      : function_(std::forward<SrcFunction>(function)),
        args_(std::forward<SrcArgs>(args)...) {}

  InvokerBase(InvokerBase&& that) = default;
  InvokerBase& operator=(InvokerBase&& that) = default;

  InvokerBase(const InvokerBase& that) = default;
  InvokerBase& operator=(const InvokerBase& that) = default;

  // Invokes the function.
  //
  // Usually conversion to the result of invocation is preferred because it can
  // avoid creating a temporary if the context accepts an arbitrary type
  // convertible to the result of invocation. An explicit `Invoke()` call can
  // force construction right away while avoiding specifying the full result
  // type.
  template <typename DependentFunction = Function>
  Result<DependentFunction> Invoke() && {
    return std::apply(std::forward<Function>(function_), std::move(args_));
  }
  template <typename DependentFunction = Function>
  ConstResult<DependentFunction> Invoke() const& {
    return std::apply(function_, args_);
  }

  // Extracts the function.
  Function& function() & { return function_; }
  const Function& function() const& { return function_; }
  Function&& function() && { return std::move(function_); }
  const Function&& function() const&& { return std::move(function_); }

  // Extracts the given argument.
  template <size_t index, std::enable_if_t<(index < sizeof...(Args)), int> = 0>
  std::tuple_element_t<index, std::tuple<Args...>>& arg() & {
    return std::get<index>(args_);
  }
  template <size_t index, std::enable_if_t<(index < sizeof...(Args)), int> = 0>
  const std::tuple_element_t<index, std::tuple<Args...>>& arg() const& {
    return std::get<index>(args_);
  }
  template <size_t index, std::enable_if_t<(index < sizeof...(Args)), int> = 0>
  std::tuple_element_t<index, std::tuple<Args...>>& arg() && {
    return std::get<index>(std::move(args_));
  }
  template <size_t index, std::enable_if_t<(index < sizeof...(Args)), int> = 0>
  const std::tuple_element_t<index, std::tuple<Args...>>& arg() const&& {
    return std::get<index>(std::move(args_));
  }

 private:
  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS Function function_;
  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS std::tuple<Args...> args_;
};

template <typename Enable, typename Function, typename... Args>
class InvokerConditionalConversion : public InvokerBase<Function, Args...> {
 private:
  using Result =
      typename InvokerConditionalConversion::InvokerBase::template Result<>;
  using ConstResult = typename InvokerConditionalConversion::InvokerBase::
      template ConstResult<>;

 public:
  using InvokerConditionalConversion::InvokerBase::InvokerBase;

  InvokerConditionalConversion(InvokerConditionalConversion&& that) = default;
  InvokerConditionalConversion& operator=(InvokerConditionalConversion&& that) =
      default;

  InvokerConditionalConversion(const InvokerConditionalConversion& that) =
      default;
  InvokerConditionalConversion& operator=(
      const InvokerConditionalConversion& that) = default;

  // Invokes the function.
  /*implicit*/ operator Result() && { return std::move(*this).Invoke(); }
  // Invokes the function.
  /*implicit*/ operator ConstResult() const& { return this->Invoke(); }
};

// Disable const functionality when the const function is not invocable with the
// const arguments.
template <typename Function, typename... Args>
class InvokerConditionalConversion<
    std::enable_if_t<std::conjunction_v<
        std::is_invocable<Function&&, Args&&...>,
        std::negation<std::is_invocable<const Function&, const Args&...>>>>,
    Function, Args...> : public InvokerBase<Function, Args...> {
 private:
  using Result =
      typename InvokerConditionalConversion::InvokerBase::template Result<>;

 public:
  using InvokerConditionalConversion::InvokerBase::InvokerBase;

  InvokerConditionalConversion(InvokerConditionalConversion&& that) = default;
  InvokerConditionalConversion& operator=(InvokerConditionalConversion&& that) =
      default;

  InvokerConditionalConversion(const InvokerConditionalConversion& that) =
      default;
  InvokerConditionalConversion& operator=(
      const InvokerConditionalConversion& that) = default;

  // Invokes the function.
  /*implicit*/ operator Result() && { return std::move(*this).Invoke(); }
};

// Disable functionality when the function is not invocable with the arguments.
template <typename Function, typename... Args>
class InvokerConditionalConversion<
    std::enable_if_t<!std::is_invocable_v<Function&&, Args&&...>>, Function,
    Args...> : public InvokerBase<Function, Args...> {
 public:
  using InvokerConditionalConversion::InvokerBase::InvokerBase;

  InvokerConditionalConversion(InvokerConditionalConversion&& that) = default;
  InvokerConditionalConversion& operator=(InvokerConditionalConversion&& that) =
      default;

  InvokerConditionalConversion(const InvokerConditionalConversion& that) =
      default;
  InvokerConditionalConversion& operator=(
      const InvokerConditionalConversion& that) = default;
};

}  // namespace invoker_internal

// `InvokerType<Function, Args...>`, usually made with
// `riegeli::Invoker(function, args...)`, packs a function together with its
// arguments. `InvokerType<Function, Args...>` is convertible to
// `Initializer<T>` when the result of `Function` is convertible to `T`.
//
// This allows the function taking `Initializer<T>` to construct the object
// in-place, avoiding constructing a temporary and moving from it.
//
// `InvokerType` complements `MakerType` by extending constructors with factory
// functions.
//
// The function and arguments are interpreted as by `std::invoke()`: the
// function can also be a member pointer, in which case the first argument is
// the target reference, reference wrapper, or pointer.
template <typename Function, typename... Args>
class InvokerType
    : public invoker_internal::InvokerConditionalConversion<void, Function,
                                                            Args...> {
 private:
  template <typename DependentFunction = Function>
  using Result =
      typename InvokerType::InvokerBase::template Result<DependentFunction>;
  template <typename DependentFunction = Function>
  using ConstResult = typename InvokerType::InvokerBase::template ConstResult<
      DependentFunction>;

 public:
  using InvokerType::InvokerConditionalConversion::InvokerConditionalConversion;

  InvokerType(InvokerType&& that) = default;
  InvokerType& operator=(InvokerType&& that) = default;

  InvokerType(const InvokerType& that) = default;
  InvokerType& operator=(const InvokerType& that) = default;

  // Invokes the function and stores `std::decay_t` of the result of invocation
  // on the heap.
  //
  // In contrast to `std::make_unique()`, this supports custom deleters.
  //
  // For a non-default-constructed deleter, use `UniquePtr(deleter)`.
  template <
      typename Target, typename Deleter, typename DependentFunction = Function,
      std::enable_if_t<
          std::conjunction_v<
              IsConstructibleFromResult<std::decay_t<Result<DependentFunction>>,
                                        Result<DependentFunction>>,
              std::is_convertible<std::decay_t<Result<DependentFunction>>*,
                                  Target*>>,
          int> = 0>
  /*implicit*/ operator std::unique_ptr<Target, Deleter>() && {
    return std::move(*this).template UniquePtr<Deleter>();
  }
  template <
      typename Target, typename Deleter, typename DependentFunction = Function,
      std::enable_if_t<
          std::conjunction_v<
              IsConstructibleFromResult<
                  std::decay_t<ConstResult<DependentFunction>>,
                  ConstResult<DependentFunction>>,
              std::is_convertible<std::decay_t<ConstResult<DependentFunction>>*,
                                  Target*>>,
          int> = 0>
  /*implicit*/ operator std::unique_ptr<Target, Deleter>() const& {
    return UniquePtr<Deleter>();
  }

  // Invokes the function and stores `std::decay_t` of the result of invocation
  // on the heap.
  //
  // In contrast to `std::make_unique()`, this supports custom deleters.
  //
  // Usually conversion to `std::unique_ptr` is preferred because it leads to
  // simpler source code. An explicit `UniquePtr()` call can force construction
  // right away while avoiding writing the full target type, and it allows to
  // use a non-default-constructed deleter.
  //
  // The `default_deleter` template parameter lets `UniquePtr<T>()` with an
  // explicit template argument unambiguously call another overload of
  // `UniquePtr()`.

  template <int default_deleter = 0, typename DependentFunction = Function,
            std::enable_if_t<IsConstructibleFromResult<
                                 std::decay_t<Result<DependentFunction>>,
                                 Result<DependentFunction>>::value,
                             int> = 0>
  std::unique_ptr<std::decay_t<Result<DependentFunction>>> UniquePtr() && {
    return std::unique_ptr<std::decay_t<Result<>>>(
        new std::decay_t<Result<>>(std::move(*this)));
  }
  template <int default_deleter = 0, typename DependentFunction = Function,
            std::enable_if_t<IsConstructibleFromResult<
                                 std::decay_t<ConstResult<DependentFunction>>,
                                 ConstResult<DependentFunction>>::value,
                             int> = 0>
  std::unique_ptr<std::decay_t<ConstResult<DependentFunction>>> UniquePtr()
      const& {
    return std::unique_ptr<std::decay_t<ConstResult<>>>(
        new std::decay_t<ConstResult<>>(*this));
  }

  template <typename Deleter, typename DependentFunction = Function,
            std::enable_if_t<IsConstructibleFromResult<
                                 std::decay_t<Result<DependentFunction>>,
                                 Result<DependentFunction>>::value,
                             int> = 0>
  std::unique_ptr<std::decay_t<Result<DependentFunction>>, Deleter>
  UniquePtr() && {
    return std::unique_ptr<std::decay_t<Result<>>, Deleter>(
        new std::decay_t<Result<>>(std::move(*this)));
  }
  template <typename Deleter, typename DependentFunction = Function,
            std::enable_if_t<IsConstructibleFromResult<
                                 std::decay_t<ConstResult<DependentFunction>>,
                                 ConstResult<DependentFunction>>::value,
                             int> = 0>
  std::unique_ptr<std::decay_t<ConstResult<DependentFunction>>, Deleter>
  UniquePtr() const& {
    return std::unique_ptr<std::decay_t<ConstResult<>>, Deleter>(
        new std::decay_t<ConstResult<>>(*this));
  }

  template <typename Deleter, typename DependentFunction = Function,
            std::enable_if_t<IsConstructibleFromResult<
                                 std::decay_t<Result<DependentFunction>>,
                                 Result<DependentFunction>>::value,
                             int> = 0>
  std::unique_ptr<std::decay_t<Result<DependentFunction>>, Deleter> UniquePtr(
      Deleter&& deleter) && {
    return std::unique_ptr<std::decay_t<Result<>>, Deleter>(
        new std::decay_t<Result<>>(std::move(*this)),
        std::forward<Deleter>(deleter));
  }
  template <typename Deleter, typename DependentFunction = Function,
            std::enable_if_t<IsConstructibleFromResult<
                                 std::decay_t<ConstResult<DependentFunction>>,
                                 ConstResult<DependentFunction>>::value,
                             int> = 0>
  std::unique_ptr<std::decay_t<ConstResult<DependentFunction>>, Deleter>
  UniquePtr(Deleter&& deleter) const& {
    return std::unique_ptr<std::decay_t<ConstResult<>>, Deleter>(
        new std::decay_t<ConstResult<>>(*this), std::forward<Deleter>(deleter));
  }
};

template <typename Function, typename... Args>
explicit InvokerType(Function&&, Args&&...)
    -> InvokerType<std::decay_t<Function>, std::decay_t<Args>...>;

// `InvokerTargetRef<T>::type` and `InvokerTargetRefT<T>` deduce the appropriate
// target type of a possibly const-qualified `InvokerType<Function, Args...>`
// or its reference, such that `T` is convertible to `InvokerTargetRefT<T>`,
// and `T::Invoke()` returns `InvokerTargetRefT<T>`.
//
// They are undefined when the invoker is not usable in the given const and
// reference context.

template <typename T>
struct InvokerTargetRef;

template <typename Function, typename... Args>
struct InvokerTargetRef<InvokerType<Function, Args...>>
    : std::invoke_result<Function&&, Args&&...> {};

template <typename Function, typename... Args>
struct InvokerTargetRef<const InvokerType<Function, Args...>>
    : std::invoke_result<const Function&, const Args&...> {};

template <typename T>
struct InvokerTargetRef<T&> : InvokerTargetRef<const T> {};

template <typename T>
struct InvokerTargetRef<T&&> : InvokerTargetRef<T> {};

template <typename T>
using InvokerTargetRefT = typename InvokerTargetRef<T>::type;

// `InvokerTarget<T>::type` and `InvokerTargetT<T>` deduce the appropriate
// target type of a possibly const-qualified `InvokerType<Function, Args...>`
// or its reference, decayed to its value type, such that `T` is convertible to
// `InvokerTargetT<T>`.
//
// This makes the result independent from whether the function returns a value
// or a reference, if the result needs to be stored for later.
//
// They are undefined when the invoker is not usable in the given const and
// reference context.

namespace invoker_internal {

template <typename T, typename Enable = void>
struct InvokerTargetImpl {
  // No `type` member when the invoker is not usable in the given const and
  // reference context.
};

template <typename T>
struct InvokerTargetImpl<T, std::void_t<InvokerTargetRefT<T>>>
    : std::decay<InvokerTargetRefT<T>> {};

}  // namespace invoker_internal

template <typename T>
struct InvokerTarget : invoker_internal::InvokerTargetImpl<T> {};

template <typename T>
using InvokerTargetT = typename InvokerTarget<T>::type;

// `riegeli::Invoker(function, args...)` returns
// `InvokerType<Function, Args...>` which packs a function together with its
// arguments. `InvokerType<Function, Args...>` is convertible to
// `Initializer<T>` when the result of `Function` is convertible to `T`.
//
// This allows the function taking `Initializer<T>` to construct the object
// in-place, avoiding constructing a temporary and moving from it.
//
// `riegeli::Invoker()` complements `riegeli::Maker()` by extending constructors
// with factory functions.
//
// The function and arguments are interpreted as by `std::invoke()`: the
// function can also be a member pointer, in which case the first argument is
// the target reference, reference wrapper, or pointer.
//
// `riegeli::Invoker(function, args...)` does not own `function` or `args`, even
// if they involve temporaries, hence it should be used only as a parameter of a
// function or constructor, so that the temporaries outlive its usage. For
// storing a `InvokerType` in a variable or returning it from a function, use
// `riegeli::OwningInvoker(function, args...)` or construct `InvokerType`
// directly.
template <typename Function, typename... Args,
          std::enable_if_t<std::is_invocable_v<Function&&, Args&&...>, int> = 0>
inline InvokerType<Function&&, Args&&...> Invoker(
    Function&& function ABSL_ATTRIBUTE_LIFETIME_BOUND,
    Args&&... args ABSL_ATTRIBUTE_LIFETIME_BOUND) {
  return {std::forward<Function>(function), std::forward<Args>(args)...};
}

// `riegeli::OwningInvoker()` is like `riegeli::Invoker()`, but the arguments
// are stored by value instead of by reference. This is useful for storing the
// `InvokerType` in a variable or returning it from a function.
//
// If a particular argument is heavy and its lifetime is sufficient for storing
// it by reference, wrap it in `std::ref()` or `std::cref()`.
template <typename Function, typename... Args,
          std::enable_if_t<std::is_invocable_v<unwrap_ref_decay_t<Function>,
                                               unwrap_ref_decay_t<Args>...>,
                           int> = 0>
inline InvokerType<unwrap_ref_decay_t<Function>, unwrap_ref_decay_t<Args>...>
OwningInvoker(Function&& function, Args&&... args) {
  return {std::forward<Function>(function), std::forward<Args>(args)...};
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_INVOKER_H_


================================================
FILE: riegeli/base/iterable.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_ITERABLE_H_
#define RIEGELI_BASE_ITERABLE_H_

#include <stddef.h>

#include <iterator>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "riegeli/base/type_traits.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

namespace iterable_internal {

// Let unqualified `begin()` below refer either to a function named `begin()`
// found via ADL or to `std::begin()`, as appropriate for the given iterable.
// This is done in a separate namespace to avoid defining `riegeli::begin`.
// Same for `end()` and `size()`.

using std::begin;
using std::end;
using std::size;

template <typename T, typename Enable = void>
struct IsIterable : std::false_type {};

template <typename T>
struct IsIterable<T, std::void_t<decltype(*begin(std::declval<T&>()))>>
    : std::true_type {};

template <typename Iterable, typename Enable = void>
struct IteratorType {};

template <typename Iterable>
struct IteratorType<Iterable, std::enable_if_t<IsIterable<Iterable>::value>>
    : type_identity<decltype(begin(std::declval<Iterable&>()))> {};

template <typename Iterable, bool move = false, typename Enable = void>
struct ElementTypeInternal {};

template <typename Iterable>
struct ElementTypeInternal<Iterable, false,
                           std::enable_if_t<IsIterable<Iterable>::value>>
    : type_identity<decltype(*begin(std::declval<Iterable&>()))> {};

template <typename Iterable>
struct ElementTypeInternal<Iterable, true,
                           std::enable_if_t<IsIterable<Iterable>::value>>
    : type_identity<decltype(*std::make_move_iterator(
          begin(std::declval<Iterable&>())))> {};

template <typename Iterable, typename Enable = void>
struct IterableHasSize : std::false_type {};

template <typename Iterable>
struct IterableHasSize<
    Iterable, std::enable_if_t<std::is_convertible_v<
                  decltype(size(std::declval<const Iterable&>())), size_t>>>
    : std::true_type {};

}  // namespace iterable_internal

// `IsIterable<T>::value` is `true` when `T` is iterable, supporting
// `begin(iterable)` after `using std::begin;` (not all details are verified).
using iterable_internal::IsIterable;

// `IteratorTypeT<Iterable>::type` and `IteratorTypeT<Iterable>` is the type of
// iterators  over `Iterable`.
using iterable_internal::IteratorType;

template <typename Iterable>
using IteratorTypeT = typename IteratorType<Iterable>::type;

// `HasMovableElements<Iterable>::value` is `true` when moving (rather than
// copying) out of elements of `Iterable` is safe. This is the case when
// `Iterable` owns its elements, i.e. it is not a view container like
// `absl::Span<T>`, and it is not an lvalue reference.
//
// By default an iterable is detected as owning its elements when iterating over
// `Iterable` and `const Iterable` yields elements of different types. This
// also catches cases where `Iterable` always yields const elements or is const
// itself. In these cases moving would be equivalent to copying, and trying to
// move would just yield unnecessarily separate template instantiations.
//
// To customize that for a class `Iterable`, define a free function
// `friend constexpr bool RiegeliHasMovableElements(Iterable*)` as a friend of
// `Iterable` inside class definition or in the same namespace as `Iterable`,
// so that it can be found via ADL.
//
// The argument of `RiegeliHasMovableElements(Iterable*)` is always a null
// pointer, used to choose the right overload based on the type.

template <typename Iterable, typename Enable = void>
struct HasMovableElements
    : std::negation<std::is_same<
          typename iterable_internal::ElementTypeInternal<Iterable>::type,
          typename iterable_internal::ElementTypeInternal<
              const Iterable>::type>> {};

template <typename Iterable>
struct HasMovableElements<
    Iterable,
    std::enable_if_t<std::conjunction_v<
        std::negation<std::is_reference<Iterable>>,
        std::is_convertible<decltype(RiegeliHasMovableElements(
                                static_cast<Iterable* absl_nullable>(nullptr))),
                            bool>>>>
    : std::bool_constant<RiegeliHasMovableElements(
          static_cast<Iterable* absl_nullable>(nullptr))> {};

template <typename Iterable>
struct HasMovableElements<Iterable&> : std::false_type {};

template <typename Iterable>
struct HasMovableElements<Iterable&&> : HasMovableElements<Iterable> {};

// `MaybeMakeMoveIterator<Iterable>(iterator)` is
// `std::make_move_iterator(iterator)` or `iterator`, depending on whether
// moving out of elements of `Iterable` is safe.
template <typename Iterable, typename Iterator>
inline auto MaybeMakeMoveIterator(Iterator iterator) {
  if constexpr (HasMovableElements<Iterable>::value) {
    return std::move_iterator<Iterator>(std::move(iterator));
  } else {
    return iterator;
  }
}

// `ElementType<Iterable>::type` and `ElementTypeT<Iterable>` is the type of
// elements yielded by iterating over `Iterable`.
//
// The result is a reference, except when iteration yields temporary objects.
// If moving out of elements of `Iterable` is safe, this is an rvalue reference.

template <typename Iterable, typename Enable = void>
struct ElementType {};

template <typename Iterable>
struct ElementType<Iterable, std::enable_if_t<IsIterable<Iterable>::value>>
    : iterable_internal::ElementTypeInternal<
          Iterable, HasMovableElements<Iterable>::value> {};

template <typename Iterable>
using ElementTypeT = typename ElementType<Iterable>::type;

// `IsIterableOf<Iterable, Element>::value` is `true` when iterating over
// `Iterable` yields elements convertible to `Element`.

template <typename Iterable, typename Element, typename Enable = void>
struct IsIterableOf : std::false_type {};

template <typename Iterable, typename Element>
struct IsIterableOf<Iterable, Element,
                    std::enable_if_t<IsIterable<Iterable>::value>>
    : std::is_convertible<ElementTypeT<Iterable>, Element> {};

// `IsIterableOfPairs<Iterable, Key, Value>::value` is `true` when iterating
// over `Iterable` yields pairs or pair proxies with keys convertible to `Key`
// and values convertible to `Value`.

template <typename Iterable, typename Key, typename Value,
          typename Enable = void>
struct IsIterableOfPairs : std::false_type {};

template <typename Iterable, typename Key, typename Value>
struct IsIterableOfPairs<
    Iterable, Key, Value,
    std::enable_if_t<std::conjunction_v<
        IsIterable<Iterable>,
        std::is_convertible<
            decltype(std::declval<ElementTypeT<Iterable>>().first), Key>,
        std::is_convertible<
            decltype(std::declval<ElementTypeT<Iterable>>().second), Value>>>>
    : std::true_type {};

// `IsIterableOfPairsWithAssignableValues<Iterable, Key, Value>::value`
// is `true` when iterating over `Iterable` yields pair proxies with keys
// convertible to `Key` and values assignable from `Value`.

template <typename Iterable, typename Key, typename Value,
          typename Enable = void>
struct IsIterableOfPairsWithAssignableValues : std::false_type {};

template <typename Iterable, typename Key, typename Value>
struct IsIterableOfPairsWithAssignableValues<
    Iterable, Key, Value,
    std::enable_if_t<std::conjunction_v<
        IsIterable<Iterable>,
        std::is_convertible<
            decltype(std::declval<ElementTypeT<Iterable>>().first), Key>,
        std::is_assignable<
            decltype(std::declval<ElementTypeT<Iterable>>().second), Value>>>>
    : std::true_type {};

// TODO: Use `typename std::iterator_traits<Iterator>::iterator_concept`
// instead when C++20 is unconditionally available.

namespace iterable_internal {

template <typename Iterator, typename Enable = void>
struct IteratorConcept
    : type_identity<
          typename std::iterator_traits<Iterator>::iterator_category> {};

template <typename Iterator>
struct IteratorConcept<
    Iterator,
    std::void_t<typename std::iterator_traits<Iterator>::iterator_concept>>
    : type_identity<typename std::iterator_traits<Iterator>::iterator_concept> {
};

}  // namespace iterable_internal

// `IsForwardIterable<Iterable>::value` is `true` when the iterator over
// `Iterable` is a forward iterator, in particular when it can be iterated
// over multiple times.

template <typename Iterable, typename Enable = void>
struct IsForwardIterable : std::false_type {};

template <typename Iterable>
struct IsForwardIterable<
    Iterable,
    std::enable_if_t<std::conjunction_v<
        IsIterable<Iterable>,
        std::is_convertible<typename iterable_internal::IteratorConcept<
                                IteratorTypeT<Iterable>>::type,
                            std::forward_iterator_tag>>>> : std::true_type {};

// `IsRandomAccessIterable<Iterable>::value` is `true` when the iterator over
// `Iterable` is a random access iterator.

template <typename Iterable, typename Enable = void>
struct IsRandomAccessIterable : std::false_type {};

template <typename Iterable>
struct IsRandomAccessIterable<
    Iterable,
    std::enable_if_t<std::conjunction_v<
        IsIterable<Iterable>,
        std::is_convertible<typename iterable_internal::IteratorConcept<
                                IteratorTypeT<Iterable>>::type,
                            std::random_access_iterator_tag>>>>
    : std::true_type {};

// `IterableHasSize<Iterable>::value` is `true` when `Iterable` supports
// `size(iterable)` after `using std::size;`.
using iterable_internal::IterableHasSize;

// Represents the result of `operator->` if `operator*` returns a proxy
// object rather than a true reference. In particular this can be used as
// `iterator::pointer` if `iterator::reference` is not a true reference.
template <typename Reference>
class ArrowProxy {
 public:
  explicit ArrowProxy(Reference ref) : ref_(std::move(ref)) {}

  ArrowProxy(const ArrowProxy& that) = default;
  ArrowProxy& operator=(const ArrowProxy& that) = default;

  ArrowProxy(ArrowProxy&& that) noexcept = default;
  ArrowProxy& operator=(ArrowProxy&& that) noexcept = default;

  const Reference* operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return &ref_;
  }

 private:
  Reference ref_;
};

// A pair-like type to be used as `iterator::reference` for iterators over a map
// with a separate storage for keys and values. In C++20 this lets the iterator
// satisfy `std::indirectly_readable`.
//
// It extends `std::pair<T1, T2>` with conversions from `std::pair<U1, U2>&`
// and with `std::basic_common_reference` specializations.
//
// Since C++23, `std::pair<T1, T2>` can be used directly instead.
template <typename T1, typename T2>
class ReferencePair : public std::pair<T1, T2> {
 public:
  using ReferencePair::pair::pair;

  template <
      class U1, class U2,
      std::enable_if_t<
          std::conjunction_v<
              std::is_constructible<T1, U1&>, std::is_constructible<T2, U2&>,
              std::negation<std::conjunction<std::is_convertible<U1&, T1>,
                                             std::is_convertible<U2&, T2>>>>,
          int> = 0>
  explicit constexpr ReferencePair(std::pair<U1, U2>& p)
      : ReferencePair::pair(p.first, p.second) {}

  template <class U1, class U2,
            std::enable_if_t<std::conjunction_v<std::is_convertible<U1&, T1>,
                                                std::is_convertible<U2&, T2>>,
                             int> = 0>
  /*implicit*/ constexpr ReferencePair(std::pair<U1, U2>& p)
      : ReferencePair::pair(p.first, p.second) {}
};

}  // namespace riegeli

#if __cplusplus >= 202002L

template <typename T1, typename T2, typename U1, typename U2,
          template <typename> class TQual, template <typename> class UQual>
struct std::basic_common_reference<riegeli::ReferencePair<T1, T2>,
                                   std::pair<U1, U2>, TQual, UQual> {
  using type =
      riegeli::ReferencePair<std::common_reference_t<TQual<T1>, UQual<U1>>,
                             std::common_reference_t<TQual<T2>, UQual<U2>>>;
};

template <typename T1, typename T2, typename U1, typename U2,
          template <typename> class TQual, template <typename> class UQual>
struct std::basic_common_reference<
    std::pair<T1, T2>, riegeli::ReferencePair<U1, U2>, TQual, UQual> {
  using type =
      riegeli::ReferencePair<std::common_reference_t<TQual<T1>, UQual<U1>>,
                             std::common_reference_t<TQual<T2>, UQual<U2>>>;
};

template <typename T1, typename T2, typename U1, typename U2,
          template <typename> class TQual, template <typename> class UQual>
struct std::basic_common_reference<riegeli::ReferencePair<T1, T2>,
                                   riegeli::ReferencePair<U1, U2>, TQual,
                                   UQual> {
  using type =
      riegeli::ReferencePair<std::common_reference_t<TQual<T1>, UQual<U1>>,
                             std::common_reference_t<TQual<T2>, UQual<U2>>>;
};

#endif

#endif  // RIEGELI_BASE_ITERABLE_H_


================================================
FILE: riegeli/base/maker.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_MAKER_H_
#define RIEGELI_BASE_MAKER_H_

#include <stddef.h>

#include <memory>
#include <tuple>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/temporary_storage.h"
#include "riegeli/base/type_traits.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// `MakerType<Args...>`, usually made with `riegeli::Maker(args...)`, packs
// constructor arguments for a yet unspecified type, which will be specified by
// the caller. `MakerType<Args...>` is convertible to `Initializer<T>` for any
// `T` which can be constructed from `Args...`.
//
// This allows the function taking `Initializer<T>` to construct the object
// in-place, avoiding constructing a temporary and moving from it.
//
// In contrast to `MakerTypeFor<T, Args...>`, `MakerType<Args...>` requires the
// caller to know `T`.
//
// `InvokerType` complements `MakerType` by extending constructors with factory
// functions.
template <typename... Args>
class MakerType
    : public ConditionallyAssignable<
          std::conjunction_v<std::negation<std::is_reference<Args>>...>> {
 public:
  // Constructs `MakerType` from `args...` convertible to `Args...`.
  template <typename... SrcArgs,
            std::enable_if_t<
                std::conjunction_v<NotSameRef<MakerType, SrcArgs...>,
                                   std::is_convertible<SrcArgs&&, Args>...>,
                int> = 0>
  /*implicit*/ MakerType(SrcArgs&&... args)
      : args_(std::forward<SrcArgs>(args)...) {}

  MakerType(MakerType&& that) = default;
  MakerType& operator=(MakerType&& that) = default;

  MakerType(const MakerType& that) = default;
  MakerType& operator=(const MakerType& that) = default;

  // Constructs the `T`.
  template <typename T,
            std::enable_if_t<std::is_constructible_v<T, Args&&...>, int> = 0>
  T Construct() && {
    return std::make_from_tuple<T>(std::move(args_));
  }
  template <
      typename T,
      std::enable_if_t<std::is_constructible_v<T, const Args&...>, int> = 0>
  T Construct() const& {
    return std::make_from_tuple<T>(args_);
  }

  // Constructs the `std::decay_t<T>` on the heap.
  //
  // In contrast to `std::make_unique()`, this supports custom deleters.
  template <typename T, typename Deleter = std::default_delete<std::decay_t<T>>,
            std::enable_if_t<
                std::is_constructible_v<std::decay_t<T>, Args&&...>, int> = 0>
  std::unique_ptr<std::decay_t<T>, Deleter> UniquePtr() && {
    return std::unique_ptr<std::decay_t<T>, Deleter>(
        new std::decay_t<T>(std::move(*this).template Construct<T>()));
  }
  template <typename T, typename Deleter,
            std::enable_if_t<
                std::is_constructible_v<std::decay_t<T>, Args&&...>, int> = 0>
  std::unique_ptr<std::decay_t<T>, Deleter> UniquePtr(Deleter&& deleter) && {
    return std::unique_ptr<std::decay_t<T>, Deleter>(
        new std::decay_t<T>(std::move(*this).template Construct<T>()),
        std::forward<Deleter>(deleter));
  }
  template <
      typename T, typename Deleter = std::default_delete<std::decay_t<T>>,
      std::enable_if_t<std::is_constructible_v<std::decay_t<T>, const Args&...>,
                       int> = 0>
  std::unique_ptr<std::decay_t<T>, Deleter> UniquePtr() const& {
    return std::unique_ptr<std::decay_t<T>, Deleter>(
        new std::decay_t<T>(Construct<T>()));
  }
  template <
      typename T, typename Deleter,
      std::enable_if_t<std::is_constructible_v<std::decay_t<T>, const Args&...>,
                       int> = 0>
  std::unique_ptr<std::decay_t<T>, Deleter> UniquePtr(
      Deleter&& deleter) const& {
    return std::unique_ptr<std::decay_t<T>, Deleter>(
        new std::decay_t<T>(Construct<T>()), std::forward<Deleter>(deleter));
  }

  // Constructs the `T` in `storage` which must outlive the returned reference.
  //
  // `Reference()` instead of `Construct()` supports `Initializer::Reference()`.
  //
  // If the `storage` argument is omitted, the result is returned by value
  // instead of by reference, which is a more efficient way to construct the
  // temporary.
  template <typename T,
            std::enable_if_t<std::is_constructible_v<T, Args&&...>, int> = 0>
  T Reference() && {
    return std::move(*this).template Construct<T>();
  }
  template <typename T,
            std::enable_if_t<std::is_constructible_v<T, Args&&...>, int> = 0>
  T&& Reference(
      TemporaryStorage<T>&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND) && {
    return std::apply(
        [&](Args&&... args) -> T&& {
          return std::move(storage).emplace(std::forward<Args>(args)...);
        },
        std::move(args_));
  }
  template <
      typename T,
      std::enable_if_t<std::is_constructible_v<T, const Args&...>, int> = 0>
  T Reference() const& {
    return Construct<T>();
  }
  template <
      typename T,
      std::enable_if_t<std::is_constructible_v<T, const Args&...>, int> = 0>
  T&& Reference(
      TemporaryStorage<T>&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND) const& {
    return std::apply(
        [&](const Args&... args) -> T&& {
          return std::move(storage).emplace(args...);
        },
        args_);
  }

  // `riegeli::Reset(dest, MakerType)` makes `dest` equivalent to the
  // constructed `T`. This avoids constructing a temporary `T` and moving from
  // it.
  template <
      typename T,
      std::enable_if_t<std::conjunction_v<std::negation<std::is_reference<T>>,
                                          SupportsReset<T, Args&&...>>,
                       int> = 0>
  friend void RiegeliReset(T& dest, MakerType&& src) {
    std::apply(
        [&](Args&&... args) {
          riegeli::Reset(dest, std::forward<Args>(args)...);
        },
        std::move(src.args_));
  }
  template <
      typename T,
      std::enable_if_t<std::conjunction_v<std::negation<std::is_reference<T>>,
                                          SupportsReset<T, const Args&...>>,
                       int> = 0>
  friend void RiegeliReset(T& dest, const MakerType& src) {
    std::apply([&](const Args&... args) { riegeli::Reset(dest, args...); },
               src.args_);
  }

  // Extracts the given argument.
  template <size_t index, std::enable_if_t<(index < sizeof...(Args)), int> = 0>
  std::tuple_element_t<index, std::tuple<Args...>>& arg() & {
    return std::get<index>(args_);
  }
  template <size_t index, std::enable_if_t<(index < sizeof...(Args)), int> = 0>
  const std::tuple_element_t<index, std::tuple<Args...>>& arg() const& {
    return std::get<index>(args_);
  }
  template <size_t index, std::enable_if_t<(index < sizeof...(Args)), int> = 0>
  std::tuple_element_t<index, std::tuple<Args...>>& arg() && {
    return std::get<index>(std::move(args_));
  }
  template <size_t index, std::enable_if_t<(index < sizeof...(Args)), int> = 0>
  const std::tuple_element_t<index, std::tuple<Args...>>& arg() const&& {
    return std::get<index>(std::move(args_));
  }

 private:
  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS std::tuple<Args...> args_;
};

// `MakerTypeFor<T, Args...>, usually made with `riegeli::Maker<T>(args...)`,
// packs constructor arguments for `T`. `MakerTypeFor<T, Args...>` is
// convertible to `Initializer<T>`.
//
// This allows the function taking `Initializer<T>` to construct the object
// in-place, avoiding constructing a temporary and moving from it.
//
// In contrast to `MakerType<Args...>`, `MakerTypeFor<T, Args...>` allows the
// caller to deduce `T`, e.g. using `TargetT`.
template <typename T, typename... Args>
class MakerTypeFor
    : public ConditionallyAssignable<
          std::conjunction_v<std::negation<std::is_reference<Args>>...>> {
 public:
  // Constructs `MakerTypeFor` from `args...` convertible to `Args...`.
  template <typename... SrcArgs,
            std::enable_if_t<
                std::conjunction_v<NotSameRef<MakerTypeFor, SrcArgs...>,
                                   std::is_constructible<T, Args&&...>,
                                   std::is_convertible<SrcArgs&&, Args>...>,
                int> = 0>
  /*implicit*/ MakerTypeFor(SrcArgs&&... args)
      : maker_(std::forward<SrcArgs>(args)...) {}

  MakerTypeFor(MakerTypeFor&& that) = default;
  MakerTypeFor& operator=(MakerTypeFor&& that) = default;

  MakerTypeFor(const MakerTypeFor& that) = default;
  MakerTypeFor& operator=(const MakerTypeFor& that) = default;

  // Constructs the `T`.
  template <
      typename DependentT = T,
      std::enable_if_t<std::is_constructible_v<DependentT, Args&&...>, int> = 0>
  /*implicit*/ operator T() && {
    return std::move(*this).Construct();
  }
  template <typename DependentT = T,
            std::enable_if_t<
                std::is_constructible_v<DependentT, const Args&...>, int> = 0>
  /*implicit*/ operator T() const& {
    return Construct();
  }

  // Constructs the `T`.
  //
  // Usually conversion to `T` is preferred because it can avoid creating a
  // temporary if the context accepts an arbitrary type convertible to `T`.
  // An explicit `Construct()` call can force construction right away while
  // avoiding specifying the full target type.
  template <
      typename DependentT = T,
      std::enable_if_t<std::is_constructible_v<DependentT, Args&&...>, int> = 0>
  T Construct() && {
    return std::move(*this).maker().template Construct<T>();
  }
  template <typename DependentT = T,
            std::enable_if_t<
                std::is_constructible_v<DependentT, const Args&...>, int> = 0>
  T Construct() const& {
    return this->maker().template Construct<T>();
  }

  // Constructs the `std::decay_t<T>` on the heap.
  //
  // In contrast to `std::make_unique()`, this supports deducing class template
  // arguments and custom deleters.
  //
  // For a non-default-constructed deleter, use `UniquePtr(deleter)`.
  template <
      typename Target, typename Deleter,
      std::enable_if_t<
          std::conjunction_v<std::is_constructible<std::decay_t<T>, Args&&...>,
                             std::is_convertible<std::decay_t<T>*, Target*>>,
          int> = 0>
  /*implicit*/ operator std::unique_ptr<Target, Deleter>() && {
    return std::move(*this).template UniquePtr<Deleter>();
  }
  template <typename Target, typename Deleter,
            std::enable_if_t<
                std::conjunction_v<
                    std::is_constructible<std::decay_t<T>, const Args&...>,
                    std::is_convertible<std::decay_t<T>*, Target*>>,
                int> = 0>
  /*implicit*/ operator std::unique_ptr<Target, Deleter>() const& {
    return UniquePtr<Deleter>();
  }

  // Constructs the `std::decay_t<T>` on the heap.
  //
  // In contrast to `std::make_unique()`, this supports deducing class template
  // arguments and custom deleters.
  //
  // Usually conversion to `std::unique_ptr` is preferred because it leads to
  // simpler source code. An explicit `UniquePtr()` call can force construction
  // right away while avoiding writing the full target type, and it allows to
  // use a non-default-constructed deleter.
  template <typename Deleter = std::default_delete<std::decay_t<T>>,
            typename DependentT = T,
            std::enable_if_t<
                std::is_constructible_v<std::decay_t<DependentT>, Args&&...>,
                int> = 0>
  std::unique_ptr<std::decay_t<T>, Deleter> UniquePtr() && {
    return std::move(*this).maker().template UniquePtr<T, Deleter>();
  }
  template <typename Deleter, typename DependentT = T,
            std::enable_if_t<
                std::is_constructible_v<std::decay_t<DependentT>, Args&&...>,
                int> = 0>
  std::unique_ptr<std::decay_t<T>, Deleter> UniquePtr(Deleter&& deleter) && {
    return std::move(*this).maker().template UniquePtr<T, Deleter>(
        std::forward<Deleter>(deleter));
  }
  template <typename Deleter = std::default_delete<std::decay_t<T>>,
            typename DependentT = T,
            std::enable_if_t<std::is_constructible_v<std::decay_t<DependentT>,
                                                     const Args&...>,
                             int> = 0>
  std::unique_ptr<std::decay_t<T>, Deleter> UniquePtr() const& {
    return this->maker().template UniquePtr<T, Deleter>();
  }
  template <typename Deleter, typename DependentT = T,
            std::enable_if_t<std::is_constructible_v<std::decay_t<DependentT>,
                                                     const Args&...>,
                             int> = 0>
  std::unique_ptr<std::decay_t<T>, Deleter> UniquePtr(
      Deleter&& deleter) const& {
    return this->maker().template UniquePtr<T, Deleter>(
        std::forward<Deleter>(deleter));
  }

  // Constructs the `T` in `storage` which must outlive the returned reference.
  //
  // `Reference()` instead of conversion to `T` or `Construct()` supports
  // `Initializer::Reference()`.
  //
  // If the `storage` argument is omitted, the result is returned by value
  // instead of by reference, which is a more efficient way to construct the
  // temporary.
  template <
      typename DependentT = T,
      std::enable_if_t<std::is_constructible_v<DependentT, Args&&...>, int> = 0>
  T Reference() && {
    return std::move(*this).maker().template Reference<T>();
  }
  template <
      typename DependentT = T,
      std::enable_if_t<std::is_constructible_v<DependentT, Args&&...>, int> = 0>
  T&& Reference(
      TemporaryStorage<T>&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND) && {
    return std::move(*this).maker().template Reference<T>(std::move(storage));
  }
  template <typename DependentT = T,
            std::enable_if_t<
                std::is_constructible_v<DependentT, const Args&...>, int> = 0>
  T Reference() const& {
    return this->maker().template Reference<T>();
  }
  template <typename DependentT = T,
            std::enable_if_t<
                std::is_constructible_v<DependentT, const Args&...>, int> = 0>
  T&& Reference(
      TemporaryStorage<T>&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND) const& {
    return this->maker().template Reference<T>(std::move(storage));
  }

  // `riegeli::Reset(dest, MakerTypeFor)` makes `dest` equivalent to the
  // constructed `T`. This avoids constructing a temporary `T` and moving from
  // it.
  template <typename DependentT = T,
            std::enable_if_t<
                std::conjunction_v<std::negation<std::is_reference<DependentT>>,
                                   SupportsReset<DependentT, Args&&...>>,
                int> = 0>
  friend void RiegeliReset(T& dest, MakerTypeFor&& src) {
    riegeli::Reset(dest, std::move(src).maker());
  }
  template <typename DependentT = T,
            std::enable_if_t<
                std::conjunction_v<std::negation<std::is_reference<DependentT>>,
                                   SupportsReset<DependentT, const Args&...>>,
                int> = 0>
  friend void RiegeliReset(T& dest, const MakerTypeFor& src) {
    riegeli::Reset(dest, src.maker());
  }

  // Extracts the given argument.
  template <size_t index, std::enable_if_t<(index < sizeof...(Args)), int> = 0>
  std::tuple_element_t<index, std::tuple<Args...>>& arg() & {
    return maker().template arg<index>();
  }
  template <size_t index, std::enable_if_t<(index < sizeof...(Args)), int> = 0>
  const std::tuple_element_t<index, std::tuple<Args...>>& arg() const& {
    return maker().template arg<index>();
  }
  template <size_t index, std::enable_if_t<(index < sizeof...(Args)), int> = 0>
  std::tuple_element_t<index, std::tuple<Args...>>& arg() && {
    return std::move(*this).maker().template arg<index>();
  }
  template <size_t index, std::enable_if_t<(index < sizeof...(Args)), int> = 0>
  const std::tuple_element_t<index, std::tuple<Args...>>& arg() const&& {
    return std::move(*this).maker().template arg<index>();
  }

  // Extracts the corresponding `MakerType` which does not specify `T`.
  //
  // This is useful for handling `MakerType` and `MakerTypeFor` generically.
  MakerType<Args...>& maker() & { return maker_; }
  const MakerType<Args...>& maker() const& { return maker_; }
  MakerType<Args...>&& maker() && { return std::move(maker_); }
  const MakerType<Args...>&& maker() const&& { return std::move(maker_); }

 private:
  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS MakerType<Args...> maker_;
};

// `MakerTarget<T>::type` and `MakerTargetT<T>` deduce the appropriate target
// type of a possibly const-qualified `MakerTypeFor<Target, Args...>` or its
// reference, such that `T` is convertible to `MakerTargetT<T>`, and
// `T::Construct()` returns `MakerTargetT<T>`.
//
// They are undefined when the maker is not usable in the given const and
// reference context.

namespace maker_internal {

template <typename T, typename Enable = void>
struct MakerTargetImpl {
  // No `type` member when the maker is not usable in the given const and
  // reference context.
};

template <typename Target, typename... Args>
struct MakerTargetImpl<
    MakerTypeFor<Target, Args...>,
    std::enable_if_t<std::is_constructible_v<Target, Args&&...>>> {
  using type = Target;
};

template <typename Target, typename... Args>
struct MakerTargetImpl<
    const MakerTypeFor<Target, Args...>,
    std::enable_if_t<std::is_constructible_v<Target, const Args&...>>> {
  using type = Target;
};

}  // namespace maker_internal

template <typename T>
struct MakerTarget : maker_internal::MakerTargetImpl<T> {};

template <typename T>
struct MakerTarget<T&> : maker_internal::MakerTargetImpl<const T> {};

template <typename T>
struct MakerTarget<T&&> : maker_internal::MakerTargetImpl<T> {};

template <typename T>
using MakerTargetT = typename MakerTarget<T>::type;

// `riegeli::Maker(args...)` returns `MakerType<Args&&...>` which packs
// constructor arguments for a yet unspecified type, which will be specified by
// the caller. `riegeli::Maker(args...)` is convertible to `Initializer<T>` for
// any `T` which can be constructed from `Args...`.
//
// This allows the function taking `Initializer<T>` to construct the object
// in-place, avoiding constructing a temporary and moving from it.
//
// In contrast to `riegeli::Maker<T>(args...)`, `riegeli::Maker(args...)`
// requires the caller to know `T`. Prefer
// `Template(riegeli::Maker<T>(args...))` over
// `Template<T>(riegeli::Maker(args...))` if CTAD of `Template` can be used.
//
// `riegeli::Invoker()` complements `riegeli::Maker()` by extending constructors
// with factory functions.
//
// `riegeli::Maker(args...)` does not own `args`, even if they involve
// temporaries, hence it should be used only as a parameter of a function or
// constructor, so that the temporaries outlive its usage. For storing a
// `MakerType` in a variable or returning it from a function, use
// `riegeli::OwningMaker(args...)` or construct `MakerType` directly.
//
// The `generic` template parameter lets `riegeli::Maker<T>()` with an explicit
// template argument unambiguously call another overload of `riegeli::Maker()`.
template <int generic = 0, typename... Args>
MakerType<Args&&...> Maker(Args&&... args ABSL_ATTRIBUTE_LIFETIME_BOUND) {
  return {std::forward<Args>(args)...};
}

// `riegeli::Maker<T>(args...)` returns `MakerTypeFor<T, Args&&...>` which packs
// constructor arguments for `T`. `riegeli::Maker<T>(args...)` is convertible to
// `Initializer<T>`.
//
// This allows the function taking `Initializer<T>` to construct the object
// in-place, avoiding constructing a temporary and moving from it.
//
// `riegeli::Invoker()` complements `riegeli::Maker<T>()` by extending
// constructors with factory functions.
//
// In contrast to `riegeli::Maker(args...)`, `riegeli::Maker<T>(args...)` allows
// the caller to deduce `T`, e.g. using `TargetT`.
//
// `riegeli::Maker<T>(args...)` does not own `args`, even if they involve
// temporaries, hence it should be used only as a parameter of a function or
// constructor, so that the temporaries outlive its usage. For storing a
// `MakerTypeFor` in a variable or returning it from a function, use
// `riegeli::OwningMaker<T>(args...)` or construct `MakerTypeFor` directly.
template <typename T, typename... Args,
          std::enable_if_t<std::is_constructible_v<T, Args&&...>, int> = 0>
MakerTypeFor<T, Args&&...> Maker(Args&&... args ABSL_ATTRIBUTE_LIFETIME_BOUND) {
  return {std::forward<Args>(args)...};
}

// `riegeli::Maker<Template>()` is like `riegeli::Maker<T>()`, but the exact
// target type is deduced using CTAD from the class template and the constructor
// arguments.
//
// Only class templates with solely type template parameters are supported.
template <template <typename...> class Template, typename... Args,
          std::enable_if_t<
              std::is_constructible_v<
                  DeduceClassTemplateArgumentsT<Template, Args...>, Args&&...>,
              int> = 0>
MakerTypeFor<DeduceClassTemplateArgumentsT<Template, Args...>, Args&&...> Maker(
    Args&&... args ABSL_ATTRIBUTE_LIFETIME_BOUND) {
  return {std::forward<Args>(args)...};
}

// `riegeli::OwningMaker()` is like `riegeli::Maker()`, but the arguments are
// stored by value instead of by reference. This is useful for storing the
// `MakerType` in a variable or returning it from a function.
//
// If a particular argument is heavy and its lifetime is sufficient for storing
// it by reference, wrap it in `std::ref()` or `std::cref()`.
template <int generic = 0, typename... Args>
MakerType<unwrap_ref_decay_t<Args>...> OwningMaker(Args&&... args) {
  return {std::forward<Args>(args)...};
}

// `riegeli::OwningMaker<T>()` is like `riegeli::Maker<T>()`, but the arguments
// are stored by value instead of by reference. This is useful for storing the
// `MakerTypeFor` in a variable or returning it from a function.
//
// If a particular argument is heavy and its lifetime is sufficient for storing
// it by reference, wrap it in `std::ref()` or `std::cref()`.
template <
    typename T, typename... Args,
    std::enable_if_t<std::is_constructible_v<T, unwrap_ref_decay_t<Args>&&...>,
                     int> = 0>
MakerTypeFor<T, unwrap_ref_decay_t<Args>...> OwningMaker(Args&&... args) {
  return {std::forward<Args>(args)...};
}

// `riegeli::OwningMaker<Template>()` is like `riegeli::OwningMaker<T>()`, but
// the exact target type is deduced using CTAD from the class template and the
// constructor arguments.
//
// Only class templates with solely type template parameters are supported.
//
// If a particular argument is heavy and its lifetime is sufficient for storing
// it by reference, wrap it in `std::ref()` or `std::cref()`.
template <template <typename...> class Template, typename... Args,
          std::enable_if_t<std::is_constructible_v<
                               DeduceClassTemplateArgumentsT<
                                   Template, unwrap_ref_decay_t<Args>...>,
                               unwrap_ref_decay_t<Args>...>,
                           int> = 0>
MakerTypeFor<
    DeduceClassTemplateArgumentsT<Template, unwrap_ref_decay_t<Args>&&...>,
    unwrap_ref_decay_t<Args>...>
OwningMaker(Args&&... args) {
  return {std::forward<Args>(args)...};
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_MAKER_H_


================================================
FILE: riegeli/base/memory_estimator.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/base/memory_estimator.h"

#ifdef __GXX_RTTI
#include <cxxabi.h>  // IWYU pragma: keep
#endif
#include <stddef.h>

#include <algorithm>
#include <cstdlib>
#include <string>
#include <typeindex>
#include <utility>
#include <vector>

#include "absl/base/nullability.h"
#include "absl/container/flat_hash_set.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

bool MemoryEstimatorDefault::RegisterNodeImpl(const void* absl_nullable ptr) {
  return ptr != nullptr && objects_seen_.insert(ptr).second;
}

void MemoryEstimatorReportingUnknownTypes::RegisterUnknownTypeImpl() {
  unknown_types_no_rtti_ = true;
}

void MemoryEstimatorReportingUnknownTypes::RegisterUnknownTypeImpl(
    std::type_index index) {
  unknown_types_.insert(index);
}

std::vector<std::string> MemoryEstimatorReportingUnknownTypes::UnknownTypes()
    const {
  std::vector<std::string> result;
  result.reserve((unknown_types_no_rtti_ ? 1 : 0) + unknown_types_.size());
  if (unknown_types_no_rtti_) result.emplace_back("<no rtti>");
  for (const std::type_index index : unknown_types_) {
#ifdef __GXX_RTTI
    int status = 0;
    char* const absl_nullable demangled =
        abi::__cxa_demangle(index.name(), nullptr, nullptr, &status);
    if (status == 0 && demangled != nullptr) {
      result.emplace_back(demangled);
      std::free(demangled);
      continue;
    }
#endif
    result.emplace_back(index.name());
  }
  std::sort(result.begin(), result.end());
  return result;
}

}  // namespace riegeli


================================================
FILE: riegeli/base/memory_estimator.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_MEMORY_ESTIMATOR_H_
#define RIEGELI_BASE_MEMORY_ESTIMATOR_H_

#include <stddef.h>

#include <array>
#include <iterator>
#include <memory>
#include <optional>
#include <string>
#include <tuple>
#include <type_traits>
#include <typeindex>
#include <typeinfo>  // IWYU pragma: keep
#include <utility>
#include <variant>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/container/inlined_vector.h"
#include "absl/container/node_hash_map.h"
#include "absl/container/node_hash_set.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/estimated_allocated_size.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace google::protobuf {
class Message;
}  // namespace google::protobuf

namespace riegeli {

class MemoryEstimator;

namespace memory_estimator_internal {

template <typename T, typename Enable = void>
struct HasRiegeliRegisterSubobjects : std::false_type {};

template <typename T>
struct HasRiegeliRegisterSubobjects<
    T, std::void_t<decltype(RiegeliRegisterSubobjects(
           std::declval<const T*>(), std::declval<MemoryEstimator&>()))>>
    : std::true_type {};

}  // namespace memory_estimator_internal

// Estimates the amount of memory owned by multiple objects.
class MemoryEstimator {
 public:
  // Determines whether `RegisterSubobjects(const T*)` might be considered good:
  // either the corresponding `RiegeliRegisterSubobjects()` is defined, or `T`
  // is trivially destructible so the default definition which does nothing is
  // likely appropriate.
  //
  // This is not necessarily accurate, in particular traversing a `T` might
  // encounter types which do not have a good estimation, but if this yields
  // `false`, then `RegisterSubobjects()` is most likely an underestimation and
  // the caller might need some different way to estimate memory.
  template <typename T>
  struct RegisterSubobjectsIsGood
      : std::disjunction<
            memory_estimator_internal::HasRiegeliRegisterSubobjects<T>,
            std::is_trivially_destructible<T>> {};

  // Determines whether `RegisterSubobjects(const T*)` definitely does nothing:
  // the corresponding `RiegeliRegisterSubobjects()` is not defined and `T` is
  // trivially destructible.
  //
  // This can be used to skip a loop over elements of type `T`.
  template <typename T>
  struct RegisterSubobjectsIsTrivial
      : std::conjunction<
            std::negation<
                memory_estimator_internal::HasRiegeliRegisterSubobjects<T>>,
            std::is_trivially_destructible<T>> {};

  MemoryEstimator() = default;

  MemoryEstimator(const MemoryEstimator&) = delete;
  MemoryEstimator& operator=(const MemoryEstimator&) = delete;

  virtual ~MemoryEstimator() = default;

  // Registers the given amount of memory as used.
  void RegisterMemory(size_t memory) {
    total_memory_ = SaturatingAdd(total_memory_, memory);
  }

  // Registers the given length of a block of dynamically allocated memory as
  // used. The length should correspond to a single allocation. The actual
  // registered amount includes estimated overhead of the memory allocator.
  //
  // If the address of the allocated memory is provided, it might be used for a
  // better estimation.
  void RegisterDynamicMemory(const void* ptr, size_t memory) {
    RegisterDynamicMemoryImpl(ptr, memory);
  }
  void RegisterDynamicMemory(size_t memory) {
    RegisterDynamicMemoryImpl(memory);
  }

  // Begins registering an object which might be shared by other objects.
  //
  // The argument should be a pointer which uniquely identifies the object.
  // This is usually the pointer to the object itself, but that might not be
  // possible if the object is not public and is registered by code external
  // to it, in which case some proxy is needed.
  //
  // Returns `true` if this object was not seen yet. Only in this case the
  // caller should register its memory and subobjects.
  //
  // If `ptr == nullptr` then always returns `false`.
  bool RegisterNode(const void* absl_nullable ptr) {
    return RegisterNodeImpl(ptr);
  }

  // Adds `T` to the stored set of unknown types, to be returned by
  // `UnknownTypes()`.
  //
  // This indicates that traversal encountered a type for which
  // `RegisterSubobjects()` is not customized and the type is not trivially
  // destructible, which likely indicates that an interesting customization is
  // missing and results are underestimated.
  template <typename T>
  void RegisterUnknownType();

  // Returns `sizeof` the most derived object of which `object` is the base
  // object.
  //
  // To customize `DynamicSizeOf()` for a class `T`, define a free function:
  //   `friend size_t RiegeliDynamicSizeOf(const T* self)`
  // as a friend of `T` inside class definition or in the same namespace as `T`,
  // so that it can be found via ADL. That function typically calls a protected
  // or private virtual function whose overrides return `sizeof(Derived)`.
  //
  // By default returns `sizeof(T)`.
  template <typename T>
  static size_t DynamicSizeOf(const T* object);

  // Registers subobjects of `object`. Does not include memory corresponding to
  // `sizeof(T)`.
  //
  // To customize `RegisterSubobjects()` for a class `T`, define a free
  // function:
  //   `friend void RiegeliRegisterSubobjects(
  //        const T* self, MemoryEstimator& memory_estimator)`
  // as a friend of `T` inside class definition or in the same namespace as `T`,
  // so that it can be found via ADL. `MemoryEstimator` in the parameter type
  // can also be a template parameter to reduce library dependencies.
  //
  // By default does nothing if `T` is trivially destructible, otherwise calls
  // `RegisterUnknownType<T>()`, which likely indicates that an interesting
  // customization is missing and results are underestimated.
  //
  // If `object` might be a member variable stored as a reference (e.g. in
  // `std::tuple`), the type argument `T` must be specified explicitly, because
  // `T` deduced from the argument of `RegisterSubobjects()` would be the
  // corresponding value type. A member variable which is always a reference
  // is trivially destructible and does not need to be registered.
  //
  // Predefined customizations include:
  //  * `T[size]`
  //  * `std::unique_ptr<T, Deleter>`
  //  * `std::shared_ptr<T>`
  //  * `std::basic_string<Char, Traits, Alloc>` (`Alloc` is ignored)
  //  * `absl::Cord`
  //  * `std::optional<T>`
  //  * `std::variant<T...>`
  //  * `std::pair<T, U>`
  //  * `std::tuple<T...>`
  //  * `std::array<T, size>`
  //  * `std::vector<T, Alloc>` (`Alloc` is ignored)
  //  * `absl::InlinedVector<T, N, Alloc>` (`Alloc` is ignored)
  //  * `absl::flat_hash_set<T, Eq, Hash, Alloc>` (`Alloc` is ignored)
  //  * `absl::flat_hash_map<K, V, Eq, Hash, Alloc>` (`Alloc` is ignored)
  //  * `absl::node_hash_set<T, Eq, Hash, Alloc>` (`Alloc` is ignored)
  //  * `absl::node_hash_map<K, V, Eq, Hash, Alloc>` (`Alloc` is ignored)
  //  * `google::protobuf::Message`
  template <typename T>
  void RegisterSubobjects(const T* object);
  template <typename T, std::enable_if_t<std::is_reference_v<T>, int> = 0>
  void RegisterSubobjects(const std::remove_reference_t<T>* object);

  // Registers each element of a range.
  template <typename Iterator>
  void RegisterSubobjects(Iterator begin, Iterator end);

  // A shortcut for `RegisterDynamicMemory(object, DynamicSizeOf(object))`
  // followed by `RegisterSubobjects(object)`.
  template <typename T>
  void RegisterDynamicObject(const T* object);

  // Returns the total amount of memory added.
  size_t TotalMemory() const { return total_memory_; }

 protected:
  virtual void RegisterDynamicMemoryImpl(const void* ptr, size_t memory) = 0;
  virtual void RegisterDynamicMemoryImpl(size_t memory) = 0;
  virtual bool RegisterNodeImpl(const void* absl_nullable ptr) = 0;
  virtual void RegisterUnknownTypeImpl() = 0;
  virtual void RegisterUnknownTypeImpl(std::type_index index) = 0;

 private:
  size_t total_memory_ = 0;
};

// A `MemoryEstimator` which gives a pretty good estimate for known types.
//
//  * Includes memory allocator overhead.
//  * Takes object sharing into account.
//  * Does not report unknown types.
class MemoryEstimatorDefault : public MemoryEstimator {
 public:
  MemoryEstimatorDefault() = default;

  MemoryEstimatorDefault(const MemoryEstimatorDefault& that) = delete;
  MemoryEstimatorDefault& operator=(const MemoryEstimatorDefault& that) =
      delete;

 protected:
  void RegisterDynamicMemoryImpl(const void* ptr, size_t memory) override {
    RegisterMemory(EstimatedAllocatedSize(ptr, memory));
  }
  void RegisterDynamicMemoryImpl(size_t memory) override {
    RegisterMemory(EstimatedAllocatedSize(memory));
  }
  bool RegisterNodeImpl(const void* absl_nullable ptr) override;
  void RegisterUnknownTypeImpl() override {}
  void RegisterUnknownTypeImpl(
      ABSL_ATTRIBUTE_UNUSED std::type_index index) override {}

 private:
  absl::flat_hash_set<const void*> objects_seen_;
};

// A faster but less accurate `MemoryEstimator`.
//
//  * Does not include memory allocator overhead.
//  * Does not take object sharing into account (if an object is shared then
//    it is counted multiple times).
//  * Does not report unknown types.
class MemoryEstimatorSimplified : public MemoryEstimator {
 public:
  MemoryEstimatorSimplified() = default;

  MemoryEstimatorSimplified(const MemoryEstimatorSimplified&) = delete;
  MemoryEstimatorSimplified& operator=(const MemoryEstimatorSimplified&) =
      delete;

 protected:
  void RegisterDynamicMemoryImpl(ABSL_ATTRIBUTE_UNUSED const void* ptr,
                                 size_t memory) override {
    RegisterMemory(memory);
  }
  void RegisterDynamicMemoryImpl(size_t memory) override {
    RegisterMemory(memory);
  }
  bool RegisterNodeImpl(const void* absl_nullable ptr) override {
    return ptr != nullptr;
  }
  void RegisterUnknownTypeImpl() override {}
  void RegisterUnknownTypeImpl(
      ABSL_ATTRIBUTE_UNUSED std::type_index index) override {}
};

// A `MemoryEstimator` which can report encountered types for which
// `RegisterSubobjects()` is not customized and which are not trivially
// destructible, indicating whether interesting customizations are missing and
// results are underestimated. Otherwise behaves like `MemoryEstimatorDefault`.
//
//  * Includes memory allocator overhead.
//  * Takes object sharing into account.
//  * Reports unknown types.
class MemoryEstimatorReportingUnknownTypes : public MemoryEstimatorDefault {
 public:
  MemoryEstimatorReportingUnknownTypes() = default;

  MemoryEstimatorReportingUnknownTypes(
      const MemoryEstimatorReportingUnknownTypes&) = delete;
  MemoryEstimatorReportingUnknownTypes& operator=(
      const MemoryEstimatorReportingUnknownTypes&) = delete;

  // Returns names of encountered types for which `RegisterSubobjects()` is not
  // customized and which are not trivially destructible. If the result is not
  // empty, this likely indicates that interesting customizations are missing
  // and results are underestimated.
  //
  // If RTTI is not available, "<no rtti>" is returned as a placeholder.
  std::vector<std::string> UnknownTypes() const;

 protected:
  void RegisterUnknownTypeImpl() override;
  void RegisterUnknownTypeImpl(std::type_index index) override;

 private:
  bool unknown_types_no_rtti_ = false;
  absl::flat_hash_set<std::type_index> unknown_types_;
};

// Uses `MemoryEstimatorDefault` to estimate memory owned by a single object
// and its subobjects, including `sizeof` the original object.
template <typename T>
inline size_t EstimateMemory(const T& object) {
  MemoryEstimatorDefault memory_estimator;
  memory_estimator.RegisterMemory(MemoryEstimator::DynamicSizeOf(&object));
  memory_estimator.RegisterSubobjects(&object);
  return memory_estimator.TotalMemory();
}

// Uses `MemoryEstimatorSimplified` to estimate memory owned by a single object
// and its subobjects, including `sizeof` the original object.
template <typename T>
inline size_t EstimateMemorySimplified(const T& object) {
  MemoryEstimatorSimplified memory_estimator;
  memory_estimator.RegisterMemory(MemoryEstimator::DynamicSizeOf(&object));
  memory_estimator.RegisterSubobjects(&object);
  return memory_estimator.TotalMemory();
}

// Uses `MemoryEstimatorReportingUnknownTypes` to estimate memory owned by a
// single object and its subobjects, including `sizeof` the original object, and
// unknown types.

struct TotalMemoryWithUnknownTypes {
  size_t total_memory;
  std::vector<std::string> unknown_types;
};

template <typename T>
inline TotalMemoryWithUnknownTypes EstimateMemoryReportingUnknownTypes(
    const T& object) {
  MemoryEstimatorReportingUnknownTypes memory_estimator;
  memory_estimator.RegisterMemory(MemoryEstimator::DynamicSizeOf(&object));
  memory_estimator.RegisterSubobjects(&object);
  return TotalMemoryWithUnknownTypes{memory_estimator.TotalMemory(),
                                     memory_estimator.UnknownTypes()};
}

// Implementation details follow.

template <typename T>
inline void MemoryEstimator::RegisterUnknownType() {
  RegisterUnknownTypeImpl(
#if __cpp_rtti
      std::type_index(typeid(T))
#endif
  );
}

namespace memory_estimator_internal {

template <typename T, typename Enable = void>
struct HasRiegeliDynamicSizeOf : std::false_type {};

template <typename T>
struct HasRiegeliDynamicSizeOf<
    T, std::enable_if_t<std::is_convertible_v<
           decltype(RiegeliDynamicSizeOf(std::declval<const T*>())), size_t>>>
    : std::true_type {};

template <typename T>
inline size_t DynamicSizeOf(const T* object) {
  if constexpr (HasRiegeliDynamicSizeOf<T>::value) {
    return RiegeliDynamicSizeOf(object);
  } else {
    return sizeof(T);
  }
}

template <typename T>
inline void RegisterSubobjects(const T* object,
                               MemoryEstimator& memory_estimator) {
  if constexpr (HasRiegeliRegisterSubobjects<T>::value) {
    RiegeliRegisterSubobjects(object, memory_estimator);
  } else if constexpr (!std::is_trivially_destructible<T>::value) {
    memory_estimator.RegisterUnknownType<T>();
  }
}

}  // namespace memory_estimator_internal

template <typename T>
inline size_t MemoryEstimator::DynamicSizeOf(const T* object) {
  return memory_estimator_internal::DynamicSizeOf(object);
}

template <typename T>
inline void MemoryEstimator::RegisterSubobjects(const T* object) {
  memory_estimator_internal::RegisterSubobjects(object, *this);
}

template <typename T, std::enable_if_t<std::is_reference_v<T>, int>>
inline void MemoryEstimator::RegisterSubobjects(
    ABSL_ATTRIBUTE_UNUSED const std::remove_reference_t<T>* object) {}

template <typename Iterator>
inline void MemoryEstimator::RegisterSubobjects(Iterator begin, Iterator end) {
  if (!RegisterSubobjectsIsTrivial<
          typename std::iterator_traits<Iterator>::value_type>::value) {
    for (; begin != end; ++begin) RegisterSubobjects(&*begin);
  }
}

template <typename T>
inline void MemoryEstimator::RegisterDynamicObject(const T* object) {
  RegisterDynamicMemory(object, MemoryEstimator::DynamicSizeOf(object));
  RegisterSubobjects(object);
}

template <typename T, size_t size>
inline void RiegeliRegisterSubobjects(const T (*self)[size],
                                      MemoryEstimator& memory_estimator) {
  memory_estimator.RegisterSubobjects(*self + 0, *self + size);
}

template <typename T, typename Deleter,
          std::enable_if_t<std::conjunction_v<std::negation<std::is_void<T>>,
                                              std::negation<std::is_array<T>>>,
                           int> = 0>
inline void RiegeliRegisterSubobjects(
    const absl_nullable std::unique_ptr<T, Deleter>* self,
    MemoryEstimator& memory_estimator) {
  memory_estimator.RegisterSubobjects<Deleter>(&self->get_deleter());
  if (*self != nullptr) memory_estimator.RegisterDynamicObject(self->get());
}

namespace memory_estimator_internal {

// Reflects the layout of a control block of `std::shared_ptr` from libc++.
struct SharedPtrControlBlock {
  virtual ~SharedPtrControlBlock() = default;
  long shared_count;
  long weak_count;
};

}  // namespace memory_estimator_internal

template <typename T,
          std::enable_if_t<std::conjunction_v<std::negation<std::is_void<T>>,
                                              std::negation<std::is_array<T>>>,
                           int> = 0>
inline void RiegeliRegisterSubobjects(const
                                      absl_nullable std::shared_ptr<T>* self,
                                      MemoryEstimator& memory_estimator) {
  if (memory_estimator.RegisterNode(self->get())) {
    memory_estimator.RegisterDynamicMemory(
        sizeof(memory_estimator_internal::SharedPtrControlBlock) +
        MemoryEstimator::DynamicSizeOf(&**self));
    memory_estimator.RegisterSubobjects(&**self);
  }
}

template <typename T, size_t size>
inline void RiegeliRegisterSubobjects(
    const absl_nullable std::shared_ptr<T[size]>* self,
    MemoryEstimator& memory_estimator) {
  if (memory_estimator.RegisterNode(self->get())) {
    memory_estimator.RegisterDynamicMemory(
        sizeof(memory_estimator_internal::SharedPtrControlBlock) +
        sizeof(T[size]));
    memory_estimator.RegisterSubobjects(self->get(), self->get() + size);
  }
}

template <typename Char, typename Traits, typename Alloc>
inline void RiegeliRegisterSubobjects(
    const std::basic_string<Char, Traits, Alloc>* self,
    MemoryEstimator& memory_estimator) {
  if (self->capacity() > std::basic_string<Char, Traits, Alloc>().capacity()) {
    memory_estimator.RegisterDynamicMemory((self->capacity() + 1) *
                                           sizeof(Char));
  } else {
    // Assume short string optimization.
  }
}

inline void RiegeliRegisterSubobjects(const absl::Cord* self,
                                      MemoryEstimator& memory_estimator) {
  // Scale `self->EstimatedMemoryUsage()` by a fraction corresponding to how
  // much of its memory is newly seen.
  size_t new_bytes = 0;
  size_t total_bytes = 0;
  for (const absl::string_view fragment : self->Chunks()) {
    if (memory_estimator.RegisterNode(fragment.data())) {
      new_bytes += fragment.size();
    }
    total_bytes += fragment.size();
  }
  memory_estimator.RegisterMemory(static_cast<size_t>(
      static_cast<double>(self->EstimatedMemoryUsage() - sizeof(absl::Cord)) *
      (static_cast<double>(new_bytes) / static_cast<double>(total_bytes))));
}

template <typename T>
inline void RiegeliRegisterSubobjects(const std::optional<T>* self,
                                      MemoryEstimator& memory_estimator) {
  if (*self != std::nullopt) memory_estimator.RegisterSubobjects(&**self);
}

namespace memory_estimator_internal {

struct RegisterSubobjectsVisitor {
  template <typename T>
  void operator()(const T& object) const {
    memory_estimator.RegisterSubobjects(&object);
  }

  MemoryEstimator& memory_estimator;
};

}  // namespace memory_estimator_internal

template <typename... T>
inline void RiegeliRegisterSubobjects(const std::variant<T...>* self,
                                      MemoryEstimator& memory_estimator) {
  std::visit(
      memory_estimator_internal::RegisterSubobjectsVisitor{memory_estimator},
      *self);
}

template <typename T, typename U>
inline void RiegeliRegisterSubobjects(const std::pair<T, U>* self,
                                      MemoryEstimator& memory_estimator) {
  memory_estimator.RegisterSubobjects<T>(&self->first);
  memory_estimator.RegisterSubobjects<U>(&self->second);
}

namespace memory_estimator_internal {

template <size_t index, typename... T>
inline void RegisterTupleElements(const std::tuple<T...>* self,
                                  MemoryEstimator& memory_estimator) {
  if constexpr (index < sizeof...(T)) {
    memory_estimator
        .RegisterSubobjects<std::tuple_element_t<index, std::tuple<T...>>>(
            &std::get<index>(*self));
    RegisterTupleElements<index + 1>(self, memory_estimator);
  }
}

}  // namespace memory_estimator_internal

template <typename... T>
inline void RiegeliRegisterSubobjects(const std::tuple<T...>* self,
                                      MemoryEstimator& memory_estimator) {
  memory_estimator_internal::RegisterTupleElements<0>(self, memory_estimator);
}

template <typename T, size_t size>
inline void RiegeliRegisterSubobjects(const std::array<T, size>* self,
                                      MemoryEstimator& memory_estimator) {
  memory_estimator.RegisterSubobjects(self->cbegin(), self->cend());
}

template <typename T, typename Alloc>
inline void RiegeliRegisterSubobjects(const std::vector<T, Alloc>* self,
                                      MemoryEstimator& memory_estimator) {
  if (self->capacity() > 0) {
    memory_estimator.RegisterDynamicMemory(self->capacity() * sizeof(T));
    memory_estimator.RegisterSubobjects(self->cbegin(), self->cend());
  }
}

template <typename Alloc>
inline void RiegeliRegisterSubobjects(const std::vector<bool, Alloc>* self,
                                      MemoryEstimator& memory_estimator) {
  if (self->capacity() > 0) {
    memory_estimator.RegisterDynamicMemory(self->capacity() / 8);
  }
}

template <typename T, size_t N, typename Alloc>
inline void RiegeliRegisterSubobjects(
    const absl::InlinedVector<T, N, Alloc>* self,
    MemoryEstimator& memory_estimator) {
  if (self->capacity() > N) {
    memory_estimator.RegisterDynamicMemory(self->capacity() * sizeof(T));
  }
  memory_estimator.RegisterSubobjects(self->cbegin(), self->cend());
}

template <typename T, typename Eq, typename Hash, typename Alloc>
inline void RiegeliRegisterSubobjects(
    const absl::flat_hash_set<T, Eq, Hash, Alloc>* self,
    MemoryEstimator& memory_estimator) {
  memory_estimator.RegisterMemory(
      absl::container_internal::hashtable_debug_internal::HashtableDebugAccess<
          absl::flat_hash_set<T, Eq, Hash, Alloc>>::AllocatedByteSize(*self));
  memory_estimator.RegisterSubobjects(self->cbegin(), self->cend());
}

template <typename K, typename V, typename Eq, typename Hash, typename Alloc>
inline void RiegeliRegisterSubobjects(
    const absl::flat_hash_map<K, V, Eq, Hash, Alloc>* self,
    MemoryEstimator& memory_estimator) {
  memory_estimator.RegisterMemory(
      absl::container_internal::hashtable_debug_internal::HashtableDebugAccess<
          absl::flat_hash_map<K, V, Eq, Hash,
                              Alloc>>::AllocatedByteSize(*self));
  memory_estimator.RegisterSubobjects(self->cbegin(), self->cend());
}

template <typename T, typename Eq, typename Hash, typename Alloc>
inline void RiegeliRegisterSubobjects(
    const absl::node_hash_set<T, Eq, Hash, Alloc>* self,
    MemoryEstimator& memory_estimator) {
  memory_estimator.RegisterMemory(
      absl::container_internal::hashtable_debug_internal::HashtableDebugAccess<
          absl::node_hash_set<T, Eq, Hash, Alloc>>::AllocatedByteSize(*self));
  memory_estimator.RegisterSubobjects(self->cbegin(), self->cend());
}

template <typename K, typename V, typename Eq, typename Hash, typename Alloc>
inline void RiegeliRegisterSubobjects(
    const absl::node_hash_map<K, V, Eq, Hash, Alloc>* self,
    MemoryEstimator& memory_estimator) {
  memory_estimator.RegisterMemory(
      absl::container_internal::hashtable_debug_internal::HashtableDebugAccess<
          absl::node_hash_map<K, V, Eq, Hash,
                              Alloc>>::AllocatedByteSize(*self));
  memory_estimator.RegisterSubobjects(self->cbegin(), self->cend());
}

template <typename T,
          std::enable_if_t<
              std::is_convertible_v<T*, google::protobuf::Message*>, int> = 0>
inline void RiegeliRegisterSubobjects(const T* self,
                                      MemoryEstimator& memory_estimator) {
  memory_estimator.RegisterMemory(self->SpaceUsedLong() - sizeof(T));
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_MEMORY_ESTIMATOR_H_


================================================
FILE: riegeli/base/moving_dependency.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_MOVING_DEPENDENCY_H_
#define RIEGELI_BASE_MOVING_DEPENDENCY_H_

#include <stddef.h>

#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/type_traits.h"

namespace riegeli {

// `MovingDependency<Handle, Manager, Mover>` extends
// `Dependency<Handle, Manager>` with assistance in moving the host object
// (containing the dependency), adjusting the host state while moving the
// dependency, e.g. if moving the dependency can invalidate pointers stored
// in the host object.
//
// The host object should have a defaulted move constructor and assignment,
// so that it is move constructible and/or move assignable whenever the
// dependency is. Customizations of moving belong to its base classes, members,
// and the `Mover` class passed to `MovingDependency`.
//
// If `Dependency<Handle, Manager>::kIsStable`, then it is assumed that
// no assistance is needed, and `MovingDependency<Handle, Manager, Mover>` is
// equivalent to `Dependency<Handle, Manager>`. Otherwise the `Mover` class
// specifies adjustment of the host state. It should have the following members:
//
// ```
//   // Returns a member pointer of the host class to the `MovingDependency`.
//   // This is used to get the type of the host class, and to find the host
//   // object from the `MovingDependency` by following the member pointer in
//   // the reverse direction.
//   //
//   // Skip this for `Host` using virtual inheritance.
//   static auto member() { return &Host::dep_; }
//
//   // Constructor, called when base classes of the host object are already
//   // moved, but the dependency is not moved yet. The host object is being
//   // moved from `that` to `self`.
//   //
//   // Parameters are optional (possibly only the second one).
//   explicit Mover(Host& self, Host& that);
//
//   // Called when the dependency is already moved. The host object is being
//   // moved to `self`.
//   //
//   // Actions can also be performed in the destructor, but `Done()` receoves
//   // `self`, so that it does not have to be stored in `Mover`.
//   //
//   // This method is optional. The parameter is optional.
//   void Done(Host& self);
// ```
//
// If `Host` uses virtual inheritance, even indirectly, then the leaf class
// is responsible for moving virtual base classes. `Host` should have move
// constructor and assignment defined explicitly. Their availability can be
// conditional on movability of the `Dependency` only starting from C++20,
// using the `requires` clause. `Mover::member()` should not be defined;
// this way of finding the host object does not work on Windows when virtual
// inheritance is used. The `MovingDependency` should be move-constructed by
// passing `*this, that` as additional parameters to its constructor, and it
// should be move-assigned by calling `Reset()` instead of `operator=` and
// passing `*this, that` as additional parameters.
//
// This template is specialized but does not have a primary definition.
template <typename Handle, typename Manager, typename Mover,
          typename Enable = void>
class MovingDependency;

namespace moving_dependency_internal {

class SimpleClass {
 public:
  int member;
};

template <
    typename Mover, typename Host,
    std::enable_if_t<std::is_constructible_v<Mover, Host&, Host&>, int> = 0>
Mover MakeMover(Host& self, Host& that) {
  return Mover(self, that);
}

template <typename Mover, typename Host,
          std::enable_if_t<
              std::conjunction_v<
                  std::negation<std::is_constructible<Mover, Host&, Host&>>,
                  std::is_constructible<Mover, Host&>>,
              int> = 0>
Mover MakeMover(Host& self, ABSL_ATTRIBUTE_UNUSED Host& that) {
  return Mover(self);
}

template <typename Mover, typename Host,
          std::enable_if_t<
              std::conjunction_v<
                  std::negation<std::is_constructible<Mover, Host&, Host&>>,
                  std::negation<std::is_constructible<Mover, Host&>>,
                  std::is_default_constructible<Mover>>,
              int> = 0>
Mover MakeMover(ABSL_ATTRIBUTE_UNUSED Host& self,
                ABSL_ATTRIBUTE_UNUSED Host& that) {
  return Mover();
}

template <typename Mover, typename Host, typename Enable = void>
struct HasDoneWithSelf : std::false_type {};

template <typename Mover, typename Host>
struct HasDoneWithSelf<
    Mover, Host,
    std::void_t<decltype(std::declval<Mover&>().Done(std::declval<Host&>()))>>
    : std::true_type {};

template <typename Mover, typename Enable = void>
struct HasDoneWithoutSelf : std::false_type {};

template <typename Mover>
struct HasDoneWithoutSelf<Mover,
                          std::void_t<decltype(std::declval<Mover&>().Done())>>
    : std::true_type {};

template <typename Mover, typename Host,
          std::enable_if_t<HasDoneWithSelf<Mover, Host>::value, int> = 0>
inline void Done(Mover& mover, Host& self) {
  mover.Done(self);
}

template <typename Mover, typename Host,
          std::enable_if_t<
              std::conjunction_v<std::negation<HasDoneWithSelf<Mover, Host>>,
                                 HasDoneWithoutSelf<Mover>>,
              int> = 0>
inline void Done(Mover& mover, ABSL_ATTRIBUTE_UNUSED Host& self) {
  mover.Done();
}

template <typename Mover, typename Host,
          std::enable_if_t<
              std::conjunction_v<std::negation<HasDoneWithSelf<Mover, Host>>,
                                 std::negation<HasDoneWithoutSelf<Mover>>>,
              int> = 0>
inline void Done(ABSL_ATTRIBUTE_UNUSED Mover& mover,
                 ABSL_ATTRIBUTE_UNUSED Host& self) {}

template <typename Enable, typename T, typename... Args>
struct HasResetImpl : std::false_type {};

template <typename T, typename... Args>
struct HasResetImpl<
    std::void_t<decltype(std::declval<T&>().Reset(std::declval<Args&&>()...))>,
    T, Args...> : std::true_type {};

template <typename T, typename... Args>
struct HasReset : HasResetImpl<void, T, Args...> {};

template <typename Handle, typename Manager, typename Mover>
class MovingDependencyImpl : public Dependency<Handle, Manager> {
 public:
  using MovingDependencyImpl::Dependency::Dependency;

  // Not supported when `Host` uses virtual inheritance.
  MovingDependencyImpl(MovingDependencyImpl&& that) noexcept
      : MovingDependencyImpl(static_cast<MovingDependencyImpl&&>(that),
                             moving_dependency_internal::MakeMover<Mover>(
                                 this->get_host(Mover::member()),
                                 that.get_host(Mover::member()))) {}

  // Not supported when `Host` uses virtual inheritance.
  MovingDependencyImpl& operator=(MovingDependencyImpl&& that) noexcept {
    Mover mover = moving_dependency_internal::MakeMover<Mover>(
        this->get_host(Mover::member()), that.get_host(Mover::member()));
    MovingDependencyImpl::Dependency::operator=(
        static_cast<typename MovingDependencyImpl::Dependency&&>(that));
    moving_dependency_internal::Done(mover, this->get_host(Mover::member()));
    return *this;
  }

  // Required when `Host` uses virtual inheritance.
  template <typename Host>
  MovingDependencyImpl(MovingDependencyImpl&& that, Host& this_host,
                       Host& that_host) noexcept
      : MovingDependencyImpl(
            static_cast<MovingDependencyImpl&&>(that),
            moving_dependency_internal::MakeMover<Mover>(this_host, that_host),
            this_host) {}

  // Required when `Host` uses virtual inheritance.
  template <typename Host>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(MovingDependencyImpl&& that,
                                          Host& this_host,
                                          Host& that_host) noexcept {
    Mover mover =
        moving_dependency_internal::MakeMover<Mover>(this_host, that_host);
    MovingDependencyImpl::Dependency::operator=(
        static_cast<typename MovingDependencyImpl::Dependency&&>(that));
    moving_dependency_internal::Done(mover, this_host);
  }

  // Not `using MovingDependencyImpl::Dependency::Reset` because it might have
  // no overloads.
  template <typename... Args,
            std::enable_if_t<
                moving_dependency_internal::HasReset<
                    typename MovingDependencyImpl::Dependency, Args...>::value,
                int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Args&&... args) {
    MovingDependencyImpl::Dependency::Reset(std::forward<Args>(args)...);
  }

 private:
  explicit MovingDependencyImpl(MovingDependencyImpl&& that, Mover mover)
      : MovingDependencyImpl::Dependency(
            static_cast<typename MovingDependencyImpl::Dependency&&>(that)) {
    moving_dependency_internal::Done(mover, this->get_host(Mover::member()));
  }

  template <typename Host>
  explicit MovingDependencyImpl(MovingDependencyImpl&& that, Mover mover,
                                Host& this_host)
      : MovingDependencyImpl::Dependency(
            static_cast<typename MovingDependencyImpl::Dependency&&>(that)) {
    moving_dependency_internal::Done(mover, this_host);
  }

  template <typename Host>
  Host& get_host(MovingDependency<Handle, Manager, Mover> Host::* member) {
    // This assertion detects virtual inheritance on Windows.
    static_assert(sizeof(member) == sizeof(&SimpleClass::member),
                  "For a host class using virtual inheritance, "
                  "MovingDependency must have this_host and that_host "
                  "passed explicitly.");
    alignas(alignof(Host)) char storage[sizeof(Host)];
    const size_t offset =
        reinterpret_cast<char*>(&(reinterpret_cast<Host*>(storage)->*member)) -
        storage;
    return *reinterpret_cast<Host*>(reinterpret_cast<char*>(this) - offset);
  }
};

}  // namespace moving_dependency_internal

// Specialization when `Dependency<Handle, Manager>` is stable: delegate to it.
template <typename Handle, typename Manager, typename Mover>
class MovingDependency<Handle, Manager, Mover,
                       std::enable_if_t<Dependency<Handle, Manager>::kIsStable>>
    : public Dependency<Handle, Manager> {
 public:
  using MovingDependency::Dependency::Dependency;

  // Not supported when `Host` uses virtual inheritance.
  MovingDependency(MovingDependency&& that) = default;
  // Not supported when `Host` uses virtual inheritance.
  MovingDependency& operator=(MovingDependency&& that) = default;

  // Required when `Host` uses virtual inheritance.
  template <typename Host>
  MovingDependency(MovingDependency&& that,
                   ABSL_ATTRIBUTE_UNUSED Host& this_host,
                   ABSL_ATTRIBUTE_UNUSED Host& that_host) noexcept
      : MovingDependency::Dependency(
            static_cast<typename MovingDependency::Dependency&&>(that)) {}

  // Required when `Host` uses virtual inheritance.
  template <typename Host>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      MovingDependency&& that, ABSL_ATTRIBUTE_UNUSED Host& this_host,
      ABSL_ATTRIBUTE_UNUSED Host& that_host) noexcept {
    MovingDependency::Dependency::operator=(
        static_cast<typename MovingDependency::Dependency&&>(that));
  }

  // Not `using MovingDependency::Dependency::Reset` because it might have
  // no overloads.
  template <typename... Args,
            std::enable_if_t<
                moving_dependency_internal::HasReset<
                    typename MovingDependency::Dependency, Args...>::value,
                int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Args&&... args) {
    MovingDependency::Dependency::Reset(std::forward<Args>(args)...);
  }
};

// Specialization when `Dependency<Handle, Manager>` is not stable.
template <typename Handle, typename Manager, typename Mover>
class MovingDependency<
    Handle, Manager, Mover,
    std::enable_if_t<!Dependency<Handle, Manager>::kIsStable>>
    : public moving_dependency_internal::MovingDependencyImpl<Handle, Manager,
                                                              Mover>,
      public ConditionallyConstructible<
          false, std::is_move_constructible_v<Dependency<Handle, Manager>>>,
      public ConditionallyAssignable<
          false, std::is_move_assignable_v<Dependency<Handle, Manager>>> {
 public:
  using MovingDependency::MovingDependencyImpl::MovingDependencyImpl;

  MovingDependency(MovingDependency&& that) = default;
  MovingDependency& operator=(MovingDependency&& that) = default;
};

}  // namespace riegeli

#endif  // RIEGELI_BASE_MOVING_DEPENDENCY_H_


================================================
FILE: riegeli/base/new_aligned.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_NEW_ALIGNED_H_
#define RIEGELI_BASE_NEW_ALIGNED_H_

#include <stddef.h>

#include <limits>  // IWYU pragma: keep
#include <new>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/numeric/bits.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"                    // IWYU pragma: keep
#include "riegeli/base/estimated_allocated_size.h"  // IWYU pragma: keep

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

namespace new_aligned_internal {

template <typename T>
inline void EnsureSpaceForObject(size_t& num_bytes) {
  // Allocate enough space to construct the object, even if the caller does not
  // need the whole tail part of the object.
  num_bytes = UnsignedMax(num_bytes, sizeof(T));
}

template <>
inline void EnsureSpaceForObject<void>(
    ABSL_ATTRIBUTE_UNUSED size_t& num_bytes) {}

template <typename T, typename... Args>
inline void ConstructObject(T* ptr, Args&&... args) {
  new (ptr) T(std::forward<Args>(args)...);
}

template <>
inline void ConstructObject(ABSL_ATTRIBUTE_UNUSED void* ptr) {}

template <typename T>
inline void DestroyObject(T* ptr) {
  ptr->~T();
}

template <>
inline void DestroyObject(ABSL_ATTRIBUTE_UNUSED void* ptr) {}

}  // namespace new_aligned_internal

// `NewAligned()`/`DeleteAligned()` provide memory allocation with the specified
// alignment known at compile time, with the size specified in bytes, and which
// allow deallocation to be faster by knowing the size.
//
// The alignment and size passed to `DeleteAligned()` must be the same as in the
// corresponding `NewAligned()`. Pointer types must be compatible as with new
// and delete expressions.
//
// If `T` is `void`, raw memory is allocated but no object is constructed or
// destroyed.
//
// If the allocated size is given in terms of objects rather than bytes
// and the type is not over-aligned (i.e. its alignment is not larger than
// `alignof(max_align_t))`, it is simpler to use `std::allocator<T>()` instead.
// If the type is over-aligned, `std::allocator<T>()` works correctly only when
// `operator new(size_t, std::align_val_t)` from C++17 is available.

// TODO: Test this with overaligned types.

template <typename T, size_t alignment = alignof(T), typename... Args>
inline T* NewAligned(size_t num_bytes, Args&&... args) {
  static_assert(absl::has_single_bit(alignment),
                "alignment must be a power of 2");
  new_aligned_internal::EnsureSpaceForObject<T>(num_bytes);
  T* ptr;
  if constexpr (alignment <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) {
    ptr = static_cast<T*>(operator new(num_bytes));
  } else {
    ptr = static_cast<T*>(operator new(num_bytes, std::align_val_t(alignment)));
  }
  new_aligned_internal::ConstructObject(ptr, std::forward<Args>(args)...);
  return ptr;
}

template <typename T, size_t alignment = alignof(T)>
inline void DeleteAligned(T* ptr, size_t num_bytes) {
  static_assert(absl::has_single_bit(alignment),
                "alignment must be a power of 2");
  new_aligned_internal::EnsureSpaceForObject<T>(num_bytes);
  new_aligned_internal::DestroyObject(ptr);
  if constexpr (alignment <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) {
    operator delete(ptr, num_bytes);
  } else {
    operator delete(ptr, num_bytes, std::align_val_t(alignment));
  }
}

// `SizeReturningNewAligned()` is like `NewAligned()`, but it returns the number
// of bytes actually allocated, which can be greater than the requested number
// of bytes.
//
// The object can be freed with `DeleteAligned()`, passing either
// `min_num_bytes` or `*actual_num_bytes`, or anything between.
//
// `*actual_num_bytes` is already set during the constructor call.
template <typename T, size_t alignment = alignof(T), typename... Args>
inline T* SizeReturningNewAligned(size_t min_num_bytes,
                                  size_t* actual_num_bytes, Args&&... args) {
  static_assert(absl::has_single_bit(alignment),
                "alignment must be a power of 2");
  new_aligned_internal::EnsureSpaceForObject<T>(min_num_bytes);
  T* ptr;
  const size_t capacity = EstimatedAllocatedSize(min_num_bytes);
  if constexpr (alignment <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) {
    ptr = static_cast<T*>(operator new(capacity));
  } else {
    ptr = static_cast<T*>(operator new(capacity, std::align_val_t(alignment)));
  }
  *actual_num_bytes = capacity;
  new_aligned_internal::ConstructObject(ptr, std::forward<Args>(args)...);
  return ptr;
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_NEW_ALIGNED_H_


================================================
FILE: riegeli/base/null_safe_memcpy.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_NULL_SAFE_MEMCPY_H_
#define RIEGELI_BASE_NULL_SAFE_MEMCPY_H_

#include <stddef.h>

#include <cstring>

#include "absl/base/nullability.h"
#include "absl/base/optimization.h"  // IWYU pragma: keep

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// `riegeli::null_safe_memcpy()` is like `std::memcpy()` but accepts null
// pointers, as long as the size is zero.
//
// Everything here applies also to `memmove()`, `memset()`, and some other
// functions.
//
// Background:
//
// `std::memcpy(nullptr, _, 0)` and `std::memcpy(_, nullptr, 0)` have undefined
// behavior, both in C and in C++. This is consistent with C rules that pointer
// arithmetic on `NULL` is undefined, even with zero offsets.
//
// In C++ pointer arithmetic on `nullptr` is defined, and an empty range
// starting from `nullptr` is valid, such as a default-constructed
// `absl::string_view`. Unfortunately `memcpy()` works like in C.
//
// https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3322.pdf is going to change
// `memcpy()` in C2y. There is no proposal so far for C++ though.
//
// Some compilers make use of this undefined behavior. In particular GCC
// optimizes explicit comparisons against `nullptr` after a pointer has been
// passed to `memcpy()`. In GCC and Clang, ubsan checks that parameters of
// `memcpy()` are not null, based on annotations in headers.
//
// A portable implementation of null-safe `memcpy()` uses an explicit length
// check against zero. Unfortunately compilers do not optimize out that
// conditional, even if the runtime library works with null pointers.
//
// According to
// https://github.com/llvm/llvm-project/issues/49459#issuecomment-2579439921 and
// https://github.com/llvm/llvm-project/issues/146484#issuecomment-3022856421,
// `memcpy()` with null pointers has always worked in Clang. Hence for Clang
// `riegeli::null_safe_memcpy()` just calls `std::memcpy()`, while disabling
// the ubsan check which would otherwise complain.

#ifdef __clang__
__attribute__((no_sanitize("nonnull-attribute")))
#endif
inline void null_safe_memcpy(void* absl_nullable dest,
                             const void* absl_nullable src, size_t length) {
#ifndef __clang__
  if (ABSL_PREDICT_FALSE(length == 0)) return;
#endif
  std::memcpy(dest, src, length);
}

#ifdef __clang__
__attribute__((no_sanitize("nonnull-attribute")))
#endif
inline void null_safe_memmove(void* absl_nullable dest,
                              const void* absl_nullable src, size_t length) {
#ifndef __clang__
  if (ABSL_PREDICT_FALSE(length == 0)) return;
#endif
  std::memmove(dest, src, length);
}

#ifdef __clang__
__attribute__((no_sanitize("nonnull-attribute")))
#endif
inline void null_safe_memset(void* absl_nullable dest, int c, size_t length) {
#ifndef __clang__
  if (ABSL_PREDICT_FALSE(length == 0)) return;
#endif
  std::memset(dest, c, length);
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_NULL_SAFE_MEMCPY_H_


================================================
FILE: riegeli/base/object.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/base/object.h"

#include <stdint.h>

#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/type_id.h"

namespace riegeli {

ObjectState& ObjectState::operator=(ObjectState&& that) noexcept {
  DeleteStatus(std::exchange(
      status_ptr_, std::exchange(that.status_ptr_, kClosedSuccessfully)));
  return *this;
}

ObjectState::~ObjectState() { DeleteStatus(status_ptr_); }

void ObjectState::Reset(Closed) {
  DeleteStatus(std::exchange(status_ptr_, kClosedSuccessfully));
}

void ObjectState::Reset() { DeleteStatus(std::exchange(status_ptr_, kOk)); }

bool ObjectState::MarkClosed() {
  if (ABSL_PREDICT_TRUE(not_failed())) {
    status_ptr_ = kClosedSuccessfully;
    return true;
  }
  reinterpret_cast<FailedStatus*>(status_ptr_)->closed = true;
  return false;
}

void ObjectState::MarkNotFailed() {
  DeleteStatus(
      std::exchange(status_ptr_, is_open() ? kOk : kClosedSuccessfully));
}

inline void ObjectState::DeleteStatus(uintptr_t status_ptr) {
  if (ABSL_PREDICT_FALSE(status_ptr != kOk &&
                         status_ptr != kClosedSuccessfully)) {
    delete reinterpret_cast<FailedStatus*>(status_ptr);
  }
}

absl::Status ObjectState::status() const {
  if (status_ptr_ == kOk) return absl::OkStatus();
  if (status_ptr_ == kClosedSuccessfully) {
    return absl::FailedPreconditionError("Object closed");
  }
  return reinterpret_cast<const FailedStatus*>(status_ptr_)->status;
}

bool ObjectState::Fail(absl::Status status) {
  RIEGELI_ASSERT(!status.ok())
      << "Failed precondition of ObjectState::Fail(): status not failed";
  if (status_ptr_ == kOk || status_ptr_ == kClosedSuccessfully) {
    status_ptr_ = reinterpret_cast<uintptr_t>(new FailedStatus{
        status_ptr_ == kClosedSuccessfully, std::move(status)});
  }
  return false;
}

void ObjectState::SetStatus(absl::Status status) {
  RIEGELI_ASSERT(!status.ok())
      << "Failed precondition of ObjectState::SetStatus(): status not failed";
  RIEGELI_ASSERT(!not_failed())
      << "Failed precondition of ObjectState::SetStatus(): "
         "ObjectState not failed";
  reinterpret_cast<FailedStatus*>(status_ptr_)->status = std::move(status);
}

bool Object::Close() {
  if (ABSL_PREDICT_FALSE(!state_.is_open())) return state_.not_failed();
  Done();
  return state_.MarkClosed();
}

void Object::Done() {}

bool Object::Fail(absl::Status status) {
  RIEGELI_ASSERT(!status.ok())
      << "Failed precondition of Object::Fail(): status not failed";
  if (ABSL_PREDICT_FALSE(!not_failed())) return false;
  state_.Fail(std::move(status));
  state_.SetStatus(AnnotateStatus(state_.status()));
  OnFail();
  return false;
}

void Object::SetStatus(absl::Status status) {
  RIEGELI_ASSERT(!status.ok())
      << "Failed precondition of Object::SetStatus(): status not failed";
  RIEGELI_ASSERT(!not_failed())
      << "Failed precondition of Object::SetStatus(): Object not failed";
  state_.SetStatus(std::move(status));
}

void Object::OnFail() {}

absl::Status Object::AnnotateStatusImpl(absl::Status status) { return status; }

bool Object::FailWithoutAnnotation(absl::Status status) {
  RIEGELI_ASSERT(!status.ok())
      << "Failed precondition of Object::FailWithoutAnnotation(): "
         "status not failed";
  if (ABSL_PREDICT_FALSE(!not_failed())) return false;
  state_.Fail(std::move(status));
  OnFail();
  return false;
}

absl::Status Object::StatusOrAnnotate(absl::Status other_status) {
  if (ABSL_PREDICT_FALSE(!ok())) return status();
  return AnnotateStatus(std::move(other_status));
}

TypeId Object::GetTypeId() const { return TypeId(); }

}  // namespace riegeli


================================================
FILE: riegeli/base/object.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_OBJECT_H_
#define RIEGELI_BASE_OBJECT_H_

#include <stdint.h>

#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/type_id.h"

namespace riegeli {

// By convention, a constructor with a single parameter of type `Closed`
// constructs the object as closed.
struct Closed {};
inline constexpr Closed kClosed = {};

// Internal representation of the basic state of class `Object` and similar
// classes: whether the object is open or closed, and whether it is not failed
// or failed (with an associated `absl::Status` for failure details).
class ABSL_ATTRIBUTE_TRIVIAL_ABI ObjectState {
 public:
  // Creates a closed `ObjectState`.
  explicit ObjectState(Closed) noexcept : status_ptr_(kClosedSuccessfully) {}

  // Creates an open `ObjectState`.
  ObjectState() = default;

  ObjectState(const ObjectState& that) = delete;
  ObjectState& operator=(const ObjectState& that) = delete;

  ObjectState(ObjectState&& that) noexcept;
  ObjectState& operator=(ObjectState&& that) noexcept;

  ~ObjectState();

  // Makes `*this` equivalent to a newly constructed `ObjectState`. This avoids
  // constructing a temporary `ObjectState` and moving from it.
  void Reset(Closed);
  void Reset();

  // Returns `true` if the `ObjectState` is OK, i.e. open and not failed.
  bool ok() const;

  // Returns `true` if the `ObjectState` is open, i.e. not closed.
  bool is_open() const;

  // Returns `true` if the `ObjectState` is not failed.
  bool not_failed() const;

  // Returns an `absl::Status` describing the failure if the `ObjectState` is
  // failed, or an `absl::FailedPreconditionError()` if the `ObjectState` is
  // successfully closed, or `absl::OkStatus()` if the `ObjectState` is OK.
  absl::Status status() const;

  // Marks the `ObjectState` as closed, keeping its `not_failed()` state
  // unchanged.
  //
  // Returns `not_failed()`.
  bool MarkClosed();

  // Marks the `ObjectState` as failed with the given `status`, keeping its
  // `is_open()` state unchanged.
  //
  // Always returns `false`.
  //
  // Precondition: `!status.ok()`
  ABSL_ATTRIBUTE_COLD bool Fail(absl::Status status);

  // Marks the `ObjectState` as not failed, keeping its `is_open()` state
  // unchanged.
  void MarkNotFailed();

  // Replaces the status of an already failed `ObjectState`.
  //
  // Preconditions:
  //   `!status.ok()`
  //   `!not_failed()`
  void SetStatus(absl::Status status);

 private:
  struct FailedStatus {
    // The `closed` flag may be changed from `false` to `true` by `Close()`.
    bool closed;
    // The actual `absl::Status`, never `absl::OkStatus()`.
    absl::Status status;
  };

  static constexpr uintptr_t kOk = 0;
  static constexpr uintptr_t kClosedSuccessfully = 1;

  static void DeleteStatus(uintptr_t status_ptr);

  // `status_ptr_` is `kOk`, or `kClosedSuccessfully`, or a `FailedStatus*`
  // `reinterpret_cast` to `uintptr_t`.
  uintptr_t status_ptr_ = kOk;
};

// `Object` is an abstract base class for data readers and writers, managing
// their state: whether they are open or closed, and whether they are not failed
// or failed (with an associated `absl::Status` for failure details). An
// `Object` is OK when it is open and not failed.
//
// An `Object` becomes closed when `Close()` finishes, when constructed as
// closed (usually with no parameters), or when moved from.
//
// An `Object` fails when it could not perform an operation due to an unexpected
// reason.
//
// Derived `Object` classes can be movable but not copyable. After a move the
// source `Object` is left closed.
class Object {
 public:
  Object(const Object&) = delete;
  Object& operator=(const Object&) = delete;

  virtual ~Object() {}

  // Indicates that the `Object` is no longer needed, but in the case of a
  // writer that its destination is needed. `Close()` may also report new
  // failures.
  //
  // If `!is_open()`, does nothing. Otherwise:
  //  * In the case of a writer, pushes buffered data to the destination.
  //  * In the case of a reader, verifies that the source is not truncated at
  //    the current position, i.e. that it either has more data or ends cleanly
  //    (for sources where truncation can be distinguished from a clean end).
  //  * Synchronizes the current position to the source or destination (if
  //    applicable).
  //  * Closes owned dependencies.
  //
  // Returns `true` if the `Object` did not fail, i.e. if it was OK just before
  // becoming closed.
  //
  // It is necessary to call `Close()` at the end of a successful writing
  // session, and it is recommended to call `Close()` at the end of a successful
  // reading session. It is not needed to call `Close()` on early returns,
  // assuming that contents of the destination do not matter after all, e.g.
  // because a failure is being reported instead; the destructor releases
  // resources in any case.
  bool Close();

  // Returns `true` if the `Object` is OK, i.e. open and not failed.
  bool ok() const { return state_.ok(); }

  // Returns `true` if the `Object` is open, i.e. not closed.
  bool is_open() const { return state_.is_open(); }

  // Returns `true` if the `Object` is not failed.
  bool not_failed() const { return state_.not_failed(); }

  // Returns an `absl::Status` describing the failure if the `Object` is failed,
  // or an `absl::FailedPreconditionError()` if the `Object` is successfully
  // closed, or `absl::OkStatus()` if the `Object` is OK.
  absl::Status status() const { return state_.status(); }

  // Marks the `Object` as failed with the given `status`, keeping its
  // `is_open()` state unchanged.
  //
  // In derived classes `Fail()` may have additional effects. In particular the
  // status can be annotated with some details using `AnnotateStatus()`, and
  // `OnFail()` may update other state.
  //
  // If `Fail()` is called multiple times, the first `absl::Status` wins.
  //
  // `Fail()` always returns `false`, for convenience of reporting the failure
  // as a `false` result of a failing function.
  //
  // `Fail()` is normally called by other methods of the same `Object`, but it
  // is public to allow injecting a failure related to the `Object` (such as
  // unexpected data returned by it) if that failure does not have to be
  // distinguished from failures of the `Object` itself.
  //
  // Precondition: `!status.ok()`
  ABSL_ATTRIBUTE_COLD bool Fail(absl::Status status);

  // Can annotate `status` with some context, appropriately for the derived
  // class.
  //
  // This is called by `Fail()`, and can also be called externally to annotate a
  // failure related to this `Object`.
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatus(absl::Status status);

  // Returns `status()` if `!ok()`, otherwise returns
  // `AnnotateStatus(std::move(other_status))`.
  //
  // This is typically used after a failed call which does not necessarily
  // make the object not OK, e.g. after reading functions, to return an
  // explanation if the source ends prematurely or has unexpected contents.
  ABSL_ATTRIBUTE_COLD absl::Status StatusOrAnnotate(absl::Status other_status);

  // Returns a token which allows to detect the class of the `Object` at
  // runtime.
  //
  // By default returns `TypeId()`. In order for a class to participate in class
  // detection at runtime, it must override `GetTypeId()`:
  // ```
  //   riegeli::TypeId A::GetTypeId() const override {
  //     return riegeli::TypeId::For<A>();
  //   }
  // ```
  //
  // Then, to actually cast:
  // ```
  //   if (A* const a = object->GetIf<A>()) {
  //     ...
  //   }
  // ```
  //
  // This solution is more limited but faster than `typeid` or `dynamic_cast`.
  virtual TypeId GetTypeId() const;

  // Casts the runtime type of `this`, as determined by `GetTypeId()`, down to
  // `Target*`. Returns `nullptr` if the type does not match.
  template <typename Target>
  Target* GetIf();
  template <typename Target>
  const Target* GetIf() const;

 protected:
  // Creates a closed `Object`.
  explicit Object(Closed) noexcept : state_(kClosed) {}

  // Creates an open `Object`.
  Object() = default;

  // Moves the part of the object defined in the `Object` class.
  Object(Object&& that) = default;
  Object& operator=(Object&& that) = default;

  // Makes `*this` equivalent to a newly constructed `Object`. This avoids
  // constructing a temporary `Object` and moving from it. Derived classes which
  // redefine `Reset()` should include a call to `Object::Reset()`.
  void Reset(Closed) { state_.Reset(kClosed); }
  void Reset() { state_.Reset(); }

  // Marks the `Object` as not failed, keeping its `is_open()` state unchanged.
  // This can be used if the `Object` supports recovery after some failures.
  void MarkNotFailed() { state_.MarkNotFailed(); }

  // Replaces the status of an already failed `Object`.
  //
  // Preconditions:
  //   `!status.ok()`
  //   `!not_failed()`
  void SetStatus(absl::Status status);

  // Implementation of `Close()`, called if the `Object` is not closed yet.
  //
  // `Close()` returns early if `!is_open()`, otherwise calls `Done()` and marks
  // the `Object` as closed. See `Close()` for details of the responsibility of
  // `Done()`.
  //
  // The default implementation in `Object::Done()` does nothing.
  //
  // Precondition: `is_open()`
  virtual void Done();

  // Called by `Fail()` and `FailWithoutAnnotation()`. Can update some other
  // state, appropriately for the derived class.
  //
  // The default implementation in `Object::OnFail()` does nothing.
  ABSL_ATTRIBUTE_COLD virtual void OnFail();

  // Implementation of `AnnotateStatus()`.
  //
  // The default implementation in `Object::AnnotateStatusImpl()` returns
  // `status` unchanged.
  ABSL_ATTRIBUTE_COLD virtual absl::Status AnnotateStatusImpl(
      absl::Status status);

  // Exposes a variant of `Fail()` which does not call `AnnotateStatus()`.
  //
  // This can be called instead of `Fail()` if the annotation supplied by
  // `AnnotateStatus()` would be irrelevant or duplicated in a particular case.
  ABSL_ATTRIBUTE_COLD bool FailWithoutAnnotation(absl::Status status);

  // Supports `Dependency`.
  friend MakerType<Closed> RiegeliDependencySentinel(Object*) {
    return {kClosed};
  }

 private:
  ObjectState state_;
};

// Implementation details follow.

inline ObjectState::ObjectState(ObjectState&& that) noexcept
    : status_ptr_(std::exchange(that.status_ptr_, kClosedSuccessfully)) {}

inline bool ObjectState::ok() const { return status_ptr_ == kOk; }

inline bool ObjectState::is_open() const {
  if (ABSL_PREDICT_TRUE(status_ptr_ == kOk)) return true;
  if (ABSL_PREDICT_TRUE(status_ptr_ == kClosedSuccessfully)) return false;
  return !reinterpret_cast<const FailedStatus*>(status_ptr_)->closed;
}

inline bool ObjectState::not_failed() const {
  return status_ptr_ == kOk || status_ptr_ == kClosedSuccessfully;
}

inline absl::Status Object::AnnotateStatus(absl::Status status) {
  return AnnotateStatusImpl(std::move(status));
}

template <typename Target>
inline Target* Object::GetIf() {
  static_assert(std::is_base_of_v<Object, Target>,
                "GetIf() supports only downcasts");
  if (GetTypeId() != TypeId::For<Target>()) return nullptr;
  return static_cast<Target*>(this);
}

template <typename Target>
inline const Target* Object::GetIf() const {
  static_assert(std::is_base_of_v<Object, Target>,
                "GetIf() supports only downcasts");
  if (GetTypeId() != TypeId::For<Target>()) return nullptr;
  return static_cast<const Target*>(this);
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_OBJECT_H_


================================================
FILE: riegeli/base/optional_compact_string.h
================================================
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_OPTIONAL_COMPACT_STRING_H_
#define RIEGELI_BASE_OPTIONAL_COMPACT_STRING_H_

#include <stdint.h>

#include <cstddef>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/compact_string.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/iterable.h"
#include "riegeli/base/type_traits.h"

namespace riegeli {

// Similar to `std::optional<CompactString>`, but takes up the same amount of
// space as `CompactString`.
//
// `OptionalCompactString` is either null or stores data equivalent to a
// `CompactString`. It allows examining the contents as `absl::string_view` or
// `const char*`, but not as `CompactString`, except by copying or moving from.
class ABSL_ATTRIBUTE_TRIVIAL_ABI OptionalCompactString
    : public WithCompare<OptionalCompactString> {
 public:
  // Creates a null `OptionalCompactString`.
  OptionalCompactString() = default;
  /*implicit*/ OptionalCompactString(std::nullptr_t) {}
  OptionalCompactString& operator=(std::nullptr_t) {
    if (repr_ != kNullRepr) {
      CompactString::MoveFromRaw(repr_);
      repr_ = kNullRepr;
    }
    return *this;
  }

  // Creates an `OptionalCompactString` which holds a copy of `src`.
  explicit OptionalCompactString(BytesRef src)
      : repr_(CompactString(src).RawMove()) {}
  OptionalCompactString& operator=(BytesRef src) {
    const uintptr_t old_repr =
        std::exchange(repr_, CompactString(src).RawMove());
    if (old_repr != kNullRepr) CompactString::MoveFromRaw(old_repr);
    return *this;
  }

  // Creates an `OptionalCompactString` which holds a copy of `src`.
  explicit OptionalCompactString(const CompactString& src)
      : repr_(CompactString(src).RawMove()) {}
  OptionalCompactString& operator=(const CompactString& src) {
    const uintptr_t old_repr =
        std::exchange(repr_, CompactString(src).RawMove());
    if (old_repr != kNullRepr) CompactString::MoveFromRaw(old_repr);
    return *this;
  }

  // Creates an `OptionalCompactString` which holds `src`. The source
  // `CompactString` is left empty.
  explicit OptionalCompactString(CompactString&& src)
      : repr_(std::move(src).RawMove()) {}
  OptionalCompactString& operator=(CompactString&& src) {
    const uintptr_t old_repr = std::exchange(repr_, std::move(src).RawMove());
    if (old_repr != kNullRepr) CompactString::MoveFromRaw(old_repr);
    return *this;
  }

  OptionalCompactString(const OptionalCompactString& that) noexcept
      : repr_(that.repr_ == kNullRepr ? kNullRepr
                                      : CompactString::CopyRaw(that.repr_)) {}
  OptionalCompactString& operator=(const OptionalCompactString& that) noexcept {
    const uintptr_t old_repr = std::exchange(
        repr_, that.repr_ == kNullRepr ? kNullRepr
                                       : CompactString::CopyRaw(that.repr_));
    if (old_repr != kNullRepr) CompactString::MoveFromRaw(old_repr);
    return *this;
  }

  // The source `OptionalCompactString` is left null.
  OptionalCompactString(OptionalCompactString&& that) noexcept
      : repr_(std::exchange(that.repr_, kNullRepr)) {}
  OptionalCompactString& operator=(OptionalCompactString&& that) noexcept {
    const uintptr_t old_repr =
        std::exchange(repr_, std::exchange(that.repr_, kNullRepr));
    if (old_repr != kNullRepr) CompactString::MoveFromRaw(old_repr);
    return *this;
  }

  ~OptionalCompactString() {
    if (repr_ != kNullRepr) CompactString::MoveFromRaw(repr_);
  }

  // Extracts the value as a `CompactString`.
  //
  // Precondition: `*this != nullptr`.
  CompactString Release() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT(*this != nullptr)
        << "Failed precondition of OptionalCompactString::Release(): "
           "OptionalCompactString is nullptr";
    return CompactString::MoveFromRaw(std::exchange(repr_, kNullRepr));
  }

  // Views the value as an `absl::string_view`.
  //
  // Precondition: `*this != nullptr`.
  absl::string_view operator*() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT(*this != nullptr)
        << "Failed precondition of OptionalCompactString::operator*: "
           "OptionalCompactString is nullptr";
    return CompactString::ViewFromRaw(&repr_);
  }

  ArrowProxy<absl::string_view> operator->() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT(*this != nullptr)
        << "Failed precondition of OptionalCompactString::operator->: "
           "OptionalCompactString is nullptr";
    return ArrowProxy<absl::string_view>(**this);
  }

  // Ensures that the value is NUL-terminated after its size and returns
  // a pointer to it. Returns `nullptr` for a null `OptionalCompactString`.
  //
  // In contrast to `std::string::c_str()`, this is a non-const operation.
  // It may reallocate the string and it writes the NUL each time.
  const char* c_str() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    if (repr_ == kNullRepr) return nullptr;
    return CompactString::CStrFromRaw(&repr_);
  }

  friend bool operator==(const OptionalCompactString& a,
                         const OptionalCompactString& b) {
    if (a.repr_ == b.repr_) return true;
    if (a.repr_ == kNullRepr || b.repr_ == kNullRepr) return false;
    return *a == *b;
  }
  friend StrongOrdering RIEGELI_COMPARE(const OptionalCompactString& a,
                                        const OptionalCompactString& b) {
    if (a.repr_ == b.repr_) return StrongOrdering::equal;
    if (a.repr_ == kNullRepr) return StrongOrdering::less;
    if (b.repr_ == kNullRepr) return StrongOrdering::greater;
    return riegeli::Compare(*a, *b);
  }

  friend bool operator==(const OptionalCompactString& a, std::nullptr_t) {
    return a.repr_ == kNullRepr;
  }
  friend StrongOrdering RIEGELI_COMPARE(const OptionalCompactString& a,
                                        std::nullptr_t) {
    if (a.repr_ == kNullRepr) return StrongOrdering::equal;
    return StrongOrdering::greater;
  }

  template <
      typename T,
      std::enable_if_t<std::conjunction_v<NotSameRef<OptionalCompactString, T>,
                                          NotSameRef<std::nullptr_t, T>,
                                          std::is_convertible<T&&, BytesRef>>,
                       int> = 0>
  friend bool operator==(const OptionalCompactString& a, T&& b) {
    if (a.repr_ == kNullRepr) return false;
    return *a == absl::string_view(b);
  }
  template <
      typename T,
      std::enable_if_t<std::conjunction_v<NotSameRef<OptionalCompactString, T>,
                                          NotSameRef<std::nullptr_t, T>,
                                          std::is_convertible<T&&, BytesRef>>,
                       int> = 0>
  friend StrongOrdering RIEGELI_COMPARE(const OptionalCompactString& a, T&& b) {
    if (a.repr_ == kNullRepr) return StrongOrdering::less;
    return riegeli::Compare(*a, absl::string_view(b));
  }

 private:
  static constexpr uintptr_t kNullRepr = 0;

  uintptr_t repr_ = kNullRepr;
};

}  // namespace riegeli

#endif  // RIEGELI_BASE_OPTIONAL_COMPACT_STRING_H_


================================================
FILE: riegeli/base/options_parser.cc
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/base/options_parser.h"

#include <stddef.h>
#include <stdint.h>

#include <algorithm>
#include <cmath>
#include <limits>
#include <string>
#include <utility>
#include <vector>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/numbers.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/string_ref.h"

namespace riegeli {

ValueParser::ValueParser(OptionsParser* options_parser, absl::string_view key,
                         absl::string_view value)
    : options_parser_(RIEGELI_EVAL_ASSERT_NOTNULL(options_parser)),
      key_(key),
      value_(value) {}

ValueParser::Function ValueParser::Int(int min_value, int max_value, int* out) {
  RIEGELI_ASSERT_LE(min_value, max_value)
      << "Failed precondition of OptionsParser::IntOption(): "
         "bounds in the wrong order";
  return [min_value, max_value, out](ValueParser& value_parser) {
    int int_value;
    if (ABSL_PREDICT_TRUE(absl::SimpleAtoi(value_parser.value(), &int_value) &&
                          int_value >= min_value && int_value <= max_value)) {
      *out = int_value;
      return true;
    }
    return value_parser.InvalidValue(absl::StrCat(
        "integers in the range [", min_value, "..", max_value, "]"));
  };
}

ValueParser::Function ValueParser::Bytes(uint64_t min_value, uint64_t max_value,
                                         uint64_t* out) {
  RIEGELI_ASSERT_LE(min_value, max_value)
      << "Failed precondition of BytesOption(): bounds in the wrong order";
  return [min_value, max_value, out](ValueParser& value_parser) {
    absl::string_view value = value_parser.value();
    double scale = 1.0;
    if (ABSL_PREDICT_TRUE(!value.empty())) {
      switch (value.back()) {
        case 'B':
          break;
        case 'k':
        case 'K':
          scale = static_cast<double>(uint64_t{1} << 10);
          break;
        case 'M':
          scale = static_cast<double>(uint64_t{1} << 20);
          break;
        case 'G':
          scale = static_cast<double>(uint64_t{1} << 30);
          break;
        case 'T':
          scale = static_cast<double>(uint64_t{1} << 40);
          break;
        case 'P':
          scale = static_cast<double>(uint64_t{1} << 50);
          break;
        case 'E':
          scale = static_cast<double>(uint64_t{1} << 60);
          break;
        default:
          goto no_scale;
      }
      value.remove_suffix(1);
    }
  no_scale:
    double double_value;
    if (ABSL_PREDICT_TRUE(absl::SimpleAtod(value, &double_value) &&
                          double_value >= 0.0)) {
      double_value = std::round(double_value * scale);
      const uint64_t uint64_value =
          ABSL_PREDICT_FALSE(
              double_value >=
              static_cast<double>(std::numeric_limits<uint64_t>::max()))
              ? std::numeric_limits<uint64_t>::max()
              : static_cast<uint64_t>(double_value);
      if (ABSL_PREDICT_TRUE(uint64_value >= min_value &&
                            uint64_value <= max_value)) {
        *out = uint64_value;
        return true;
      }
    }
    return value_parser.InvalidValue(
        absl::StrCat("integers expressed as reals with "
                     "optional suffix [BkKMGTPE], in the range [",
                     min_value, "..", max_value, "]"));
  };
}

ValueParser::Function ValueParser::Real(double min_value, double max_value,
                                        double* out) {
  RIEGELI_ASSERT_LE(min_value, max_value)
      << "Failed precondition of IntOption(): bounds in the wrong order";
  return [min_value, max_value, out](ValueParser& value_parser) {
    double double_value;
    if (ABSL_PREDICT_TRUE(
            absl::SimpleAtod(value_parser.value(), &double_value) &&
            double_value >= min_value && double_value <= max_value)) {
      *out = double_value;
      return true;
    }
    return value_parser.InvalidValue(
        absl::StrCat("reals in the range [", min_value, "..", max_value, "]"));
  };
}

ValueParser::Function ValueParser::Or(Initializer<Function> function1,
                                      Initializer<Function> function2) {
  return [function1 = std::move(function1).Construct(),
          function2 =
              std::move(function2).Construct()](ValueParser& value_parser) {
    return function1(value_parser) || function2(value_parser);
  };
}

ValueParser::Function ValueParser::And(Initializer<Function> function1,
                                       Initializer<Function> function2) {
  return [function1 = std::move(function1).Construct(),
          function2 =
              std::move(function2).Construct()](ValueParser& value_parser) {
    return function1(value_parser) && function2(value_parser);
  };
}

ValueParser::Function ValueParser::CopyTo(std::string* text) {
  return [text](ValueParser& value_parser) {
    absl::StrAppend(text, text->empty() ? "" : ",", value_parser.key(),
                    value_parser.value().empty() ? "" : ":",
                    value_parser.value());
    return true;
  };
}

ValueParser::Function ValueParser::FailIfSeen(StringInitializer key) {
  return [key = std::move(key).Construct()](ValueParser& value_parser) {
    for (const OptionsParser::Option& option :
         value_parser.options_parser_->options_) {
      if (option.key == key) {
        if (ABSL_PREDICT_FALSE(option.seen)) {
          return value_parser.Fail(absl::InvalidArgumentError(absl::StrCat(
              "Option ", value_parser.key(), " conflicts with option ", key)));
        }
        return true;
      }
    }
    RIEGELI_ASSUME_UNREACHABLE() << "Unknown option " << key;
  };
}

ValueParser::Function ValueParser::FailIfAnySeen() {
  return [](ValueParser& value_parser) {
    for (const OptionsParser::Option& option :
         value_parser.options_parser_->options_) {
      if (ABSL_PREDICT_FALSE(option.seen)) {
        return value_parser.Fail(absl::InvalidArgumentError(
            absl::StrCat("Option ", value_parser.key(), " must be first")));
      }
    }
    return true;
  };
}

bool ValueParser::InvalidValue(absl::string_view valid_values) {
  RIEGELI_ASSERT(!valid_values.empty())
      << "Failed precondition of OptionsParser::InvalidValue(): "
         "empty valid values";
  absl::StrAppend(&valid_values_, valid_values_.empty() ? "" : ", ",
                  valid_values);
  return false;
}

bool OptionsParser::FromString(absl::string_view text) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  size_t option_begin = 0;
  for (;;) {
    size_t option_end = text.find(',', option_begin);
    if (option_end == absl::string_view::npos) option_end = text.size();
    if (option_begin != option_end) {
      const absl::string_view key_value =
          text.substr(option_begin, option_end - option_begin);
      absl::string_view key;
      absl::string_view value;
      const size_t colon = key_value.find(':');
      if (colon == absl::string_view::npos) {
        key = key_value;
      } else {
        key = key_value.substr(0, colon);
        value = key_value.substr(colon + 1);
      }
      const std::vector<Option>::iterator option = std::find_if(
          options_.begin(), options_.end(),
          [key](const Option& option) { return option.key == key; });
      if (ABSL_PREDICT_FALSE(option == options_.end())) {
        std::string message =
            absl::StrCat("Unknown option ", key, ", valid options: ");
        std::vector<Option>::const_iterator iter = options_.cbegin();
        if (iter != options_.cend()) {
          absl::StrAppend(&message, iter->key);
          for (++iter; iter != options_.cend(); ++iter) {
            absl::StrAppend(&message, ", ", iter->key);
          }
        }
        return Fail(absl::InvalidArgumentError(message));
      }
      if (ABSL_PREDICT_FALSE(option->seen)) {
        return Fail(absl::InvalidArgumentError(
            absl::StrCat("Option ", key, " is present more than once")));
      }
      ValueParser value_parser(this, key, value);
      if (ABSL_PREDICT_FALSE(!option->function(value_parser))) {
        if (!value_parser.ok()) return Fail(value_parser.status());
        return Fail(absl::InvalidArgumentError(absl::StrCat(
            "Option ", key, ": ",
            "invalid value: ", value.empty() ? "(empty)" : value,
            value_parser.valid_values_.empty() ? "" : ", valid values: ",
            value_parser.valid_values_)));
      }
      RIEGELI_ASSERT_OK(value_parser)
          << "Value parser of option " << key
          << " returned true but failed the ValueParser";
      option->seen = true;
    }
    if (option_end == text.size()) break;
    option_begin = option_end + 1;
  }
  return true;
}

}  // namespace riegeli


================================================
FILE: riegeli/base/options_parser.h
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_OPTIONS_PARSER_H_
#define RIEGELI_BASE_OPTIONS_PARSER_H_

#include <stdint.h>

#include <algorithm>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>

#include "absl/base/optimization.h"
#include "absl/functional/any_invocable.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/string_ref.h"

namespace riegeli {

class OptionsParser;

class ValueParser : public Object {
 public:
  // Parser of an option value.
  //
  // Return values:
  //  * `true`  - success (`FailIfSeen()` nor `FailIfAnySeen()` must not have
  //              been called)
  //  * `false` - failure (`InvalidValue()`, `FailIfSeen()`, or
  //              `FailIfAnySeen()` may have been called)
  using Function = absl::AnyInvocable<bool(ValueParser&) const>;

  ValueParser(const ValueParser&) = delete;
  ValueParser& operator=(const ValueParser&) = delete;

  // Value parser for absent or empty argument.
  template <typename T>
  static Function Empty(T value, T* out);

  // Value parser for explicitly enumerated valid values.
  //
  // An empty possible value matches also the case when ":" with value is
  // absent.
  template <typename T>
  static Function Enum(std::vector<std::pair<std::string, T>> possible_values,
                       T* out);

  // Value parser for integers in the range [`min_value`..`max_value`].
  static Function Int(int min_value, int max_value, int* out);

  // Value parser for integers expressed as reals with optional suffix
  // `[BkKMGTPE]`, in the range [`min_value`..`max_value`].
  static Function Bytes(uint64_t min_value, uint64_t max_value, uint64_t* out);

  // Value parser for reals in the range [`min_value`..`max_value`].
  static Function Real(double min_value, double max_value, double* out);

  // Value parser which tries multiple parsers and returns the result of the
  // first one which succeeds.
  //
  // The parsers must not include `FailIfSeen()` nor `FailIfAnySeen()`.
  // Conflicts with other options should be checked outside the `Or()`.
  static Function Or(Initializer<Function> function1,
                     Initializer<Function> function2);
  template <typename... Functions,
            std::enable_if_t<(sizeof...(Functions) > 0), int> = 0>
  static Function Or(Initializer<Function> function1,
                     Initializer<Function> function2, Functions&&... functions);

  // Value parser which runs multiple parsers and expects all of them to
  // succeed.
  static Function And(Initializer<Function> function1,
                      Initializer<Function> function2);
  template <typename... Functions,
            std::enable_if_t<(sizeof...(Functions) > 0), int> = 0>
  static Function And(Initializer<Function> function1,
                      Initializer<Function> function2,
                      Functions&&... functions);

  // Value parser which appends the option to a separate options string
  // (as comma-separated key:value pairs), to be parsed with a separate
  // `OptionsParser`.
  static Function CopyTo(std::string* text);

  // Value parser which reports a conflict if an option with any of the given
  // keys was seen before this option.
  //
  // Multiple occurrences of the same option are always invalid and do not have
  // to be explicitly checked with `FailIfSeen()`.
  static Function FailIfSeen(StringInitializer key);
  template <typename... Keys, std::enable_if_t<(sizeof...(Keys) > 0), int> = 0>
  static Function FailIfSeen(StringInitializer key, Keys&&... keys);

  // Value parser which reports a conflict if an any option was seen before this
  // option.
  static Function FailIfAnySeen();

  // Returns the key of the option being parsed.
  absl::string_view key() const { return key_; }

  // Returns the value of the option being parsed.
  absl::string_view value() const { return value_; }

  // Reports that the value is invalid, given a human-readable description of
  // values which would have been valid.
  //
  // Multiple descriptions from several `InvalidValue()` calls are joined with
  // commas. This normally happens if all parsers from `Or()` fail.
  //
  // Precondition: `!valid_values.empty()`
  //
  // Always returns `false`.
  bool InvalidValue(absl::string_view valid_values);

 private:
  friend class OptionsParser;

  explicit ValueParser(OptionsParser* options_parser, absl::string_view key,
                       absl::string_view value);

  OptionsParser* options_parser_;
  absl::string_view key_;
  absl::string_view value_;
  // When `InvalidValue()` was called, a human-readable description of valid
  // values, otherwise empty.
  std::string valid_values_;
};

class OptionsParser : public Object {
 public:
  OptionsParser() noexcept {}

  OptionsParser(const OptionsParser&) = delete;
  OptionsParser& operator=(const OptionsParser&) = delete;

  // Registers an option with the given key. Its value must be accepted by the
  // value parser.
  //
  // The value parser may be implemented explicitly (e.g. as a lambda)
  // or returned by one of functions below (called on this `OptionsParser`).
  void AddOption(StringInitializer key,
                 Initializer<ValueParser::Function> function);

  // Parses options from text. Valid options must have been registered with
  // `AddOptions()`.
  // ```
  //   options ::= option? ("," option?)*
  //   option ::= key (":" value)?
  //   key ::= (char except ',' and ':')*
  //   value ::= (char except ',')*
  // ```
  //
  // For each recognized option key, calls the corresponding value parser.
  // If ":" with value is absent, `absl::string_view()` is passed as the value.
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  bool FromString(absl::string_view text);

 private:
  friend class ValueParser;

  struct Option {
    explicit Option(StringInitializer key,
                    Initializer<ValueParser::Function> function)
        : key(std::move(key)), function(std::move(function)) {}

    std::string key;
    ValueParser::Function function;
    bool seen = false;
  };

  std::vector<Option> options_;
};

// Implementation details follow.

template <typename T>
ValueParser::Function ValueParser::Empty(T value, T* out) {
  return [value = std::move(value), out](ValueParser& value_parser) {
    if (ABSL_PREDICT_TRUE(value_parser.value().empty())) {
      *out = value;
      return true;
    }
    return value_parser.InvalidValue("(empty)");
  };
}

template <typename T>
ValueParser::Function ValueParser::Enum(
    std::vector<std::pair<std::string, T>> possible_values, T* out) {
  return [possible_values = std::move(possible_values),
          out](ValueParser& value_parser) {
    for (const std::pair<std::string, T>& possible_value : possible_values) {
      if (value_parser.value() == possible_value.first) {
        *out = possible_value.second;
        return true;
      }
    }
    for (const std::pair<std::string, T>& possible_value : possible_values) {
      value_parser.InvalidValue(possible_value.first.empty()
                                    ? absl::string_view("(empty)")
                                    : absl::string_view(possible_value.first));
    }
    return false;
  };
}

template <typename... Functions,
          std::enable_if_t<(sizeof...(Functions) > 0), int>>
ValueParser::Function ValueParser::Or(Initializer<Function> function1,
                                      Initializer<Function> function2,
                                      Functions&&... functions) {
  return Or(function1, Or(function2, std::forward<Functions>(functions)...));
}

template <typename... Functions,
          std::enable_if_t<(sizeof...(Functions) > 0), int>>
ValueParser::Function ValueParser::And(Initializer<Function> function1,
                                       Initializer<Function> function2,
                                       Functions&&... functions) {
  return And(function1, And(function2, std::forward<Functions>(functions)...));
}

template <typename... Keys, std::enable_if_t<(sizeof...(Keys) > 0), int>>
ValueParser::Function ValueParser::FailIfSeen(StringInitializer key,
                                              Keys&&... keys) {
  return And(FailIfSeen(std::move(key)),
             FailIfSeen(std::forward<Keys>(keys)...));
}

inline void OptionsParser::AddOption(
    StringInitializer key, Initializer<ValueParser::Function> function) {
  options_.emplace_back(std::move(key), std::move(function));
  RIEGELI_ASSERT(std::none_of(
      options_.cbegin(), options_.cend() - 1,
      [&](const Option& option) { return option.key == options_.back().key; }))
      << "Failed precondition of OptionsParser::AddOption(): option "
      << options_.back().key << "already registered";
}

};  // namespace riegeli

#endif  // RIEGELI_BASE_OPTIONS_PARSER_H_


================================================
FILE: riegeli/base/ownership.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_OWNERSHIP_H_
#define RIEGELI_BASE_OWNERSHIP_H_

#include <type_traits>

namespace riegeli {

// `PassOwnership` and `ShareOwnership` type tags specify how ownership of a
// potentially shared object is transferred, for cases when this is not implied
// by parameter types.
//
//  * `PassOwnership`: the original owner drops its reference. The reference
//    count is decreased unless the new owner gets a reference instead.
//
//  * `ShareOwnership`: The original owner keeps its reference. The reference
//    count is increased if the new owner also gets a reference.

struct PassOwnership {};
inline constexpr PassOwnership kPassOwnership = {};

struct ShareOwnership {};
inline constexpr ShareOwnership kShareOwnership = {};

// `IsOwnership<T>::value` is `true` if `T` is `PassOwnership` or
// `ShareOwnership`.

template <typename T>
struct IsOwnership : std::false_type {};

template <>
struct IsOwnership<PassOwnership> : std::true_type {};

template <>
struct IsOwnership<ShareOwnership> : std::true_type {};

}  // namespace riegeli

#endif  // RIEGELI_BASE_OWNERSHIP_H_


================================================
FILE: riegeli/base/parallelism.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/base/parallelism.h"

#include <stddef.h>

#include <deque>
#include <thread>
#include <utility>

#include "absl/base/thread_annotations.h"
#include "absl/functional/any_invocable.h"
#include "absl/synchronization/mutex.h"
#include "absl/time/time.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/global.h"

namespace riegeli::internal {

ThreadPool::~ThreadPool() {
  absl::MutexLock lock(mutex_);
  exiting_ = true;
  mutex_.Await(absl::Condition(
      +[](size_t* num_threads) { return *num_threads == 0; }, &num_threads_));
}

void ThreadPool::Schedule(absl::AnyInvocable<void() &&> task) {
  {
    absl::MutexLock lock(mutex_);
    RIEGELI_ASSERT(!exiting_)
        << "Failed precondition of ThreadPool::Schedule(): no new threads may "
           "be scheduled while the thread pool is exiting";
    tasks_.push_back(std::move(task));
    if (num_idle_threads_ >= tasks_.size()) return;
    ++num_threads_;
  }
  std::thread([this] {
    for (;;) {
      absl::ReleasableMutexLock lock(mutex_);
      ++num_idle_threads_;
      mutex_.AwaitWithTimeout(
          absl::Condition(
              +[](ThreadPool* self)
                   ABSL_EXCLUSIVE_LOCKS_REQUIRED(self->mutex_) {
                     return !self->tasks_.empty() || self->exiting_;
                   },
              this),
          absl::Seconds(1));
      --num_idle_threads_;
      if (tasks_.empty() || exiting_) {
        --num_threads_;
        return;
      }
      absl::AnyInvocable<void() &&> task = std::move(tasks_.front());
      tasks_.pop_front();
      lock.Release();
      std::move(task)();
    }
  }).detach();
}

ThreadPool& ThreadPool::global() {
  return Global([] { return ThreadPool(); });
}

}  // namespace riegeli::internal


================================================
FILE: riegeli/base/parallelism.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_PARALLELISM_H_
#define RIEGELI_BASE_PARALLELISM_H_

#include <stddef.h>

#include <deque>

#include "absl/base/attributes.h"
#include "absl/base/thread_annotations.h"
#include "absl/functional/any_invocable.h"
#include "absl/synchronization/mutex.h"

namespace riegeli::internal {

// A thread pool with lazily created worker threads, without a thread count
// limit. Worker threads exit after being idle for one minute.
class ThreadPool {
 public:
  ThreadPool() {}

  ThreadPool(const ThreadPool&) = delete;
  ThreadPool& operator=(const ThreadPool&) = delete;

  ~ThreadPool();

  static ThreadPool& global();

  void Schedule(absl::AnyInvocable<void() &&> task);

 private:
  absl::Mutex mutex_;
  bool exiting_ ABSL_GUARDED_BY(mutex_) = false;
  size_t num_threads_ ABSL_GUARDED_BY(mutex_) = 0;
  size_t num_idle_threads_ ABSL_GUARDED_BY(mutex_) = 0;
  std::deque<absl::AnyInvocable<void() &&>> tasks_ ABSL_GUARDED_BY(mutex_);
};

}  // namespace riegeli::internal

#endif  // RIEGELI_BASE_PARALLELISM_H_


================================================
FILE: riegeli/base/port.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_PORT_H_
#define RIEGELI_BASE_PORT_H_

#include "absl/base/nullability.h"

ABSL_POINTERS_DEFAULT_NONNULL

// Clang has `__has_builtin()`. Other compilers need other means to detect
// availability of builtins.
#ifdef __has_builtin
#define RIEGELI_INTERNAL_HAS_BUILTIN(x) __has_builtin(x)
#else
#define RIEGELI_INTERNAL_HAS_BUILTIN(x) 0
#endif

#define RIEGELI_INTERNAL_IS_GCC_VERSION(major, minor) \
  (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))

#endif  // RIEGELI_BASE_PORT_H_


================================================
FILE: riegeli/base/recycling_pool.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_RECYCLING_POOL_H_
#define RIEGELI_BASE_RECYCLING_POOL_H_

#include <stddef.h>
#include <stdint.h>

#include <atomic>
#include <limits>
#include <list>
#include <memory>
#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/base/thread_annotations.h"
#include "absl/container/flat_hash_map.h"
#include "absl/container/inlined_vector.h"
#include "absl/container/node_hash_map.h"
#include "absl/synchronization/mutex.h"
#include "absl/time/time.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/background_cleaning.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/global.h"

namespace riegeli {

// Options for `RecyclingPool` and `KeyedRecyclingPool`.
class RecyclingPoolOptions {
 public:
  RecyclingPoolOptions() = default;

  // Maximum number of objects to keep in a pool.
  //
  // 0 effectively disables the pool: objects are destroyed immediately.
  //
  // Default: `kDefaultMaxSize` (16).
  static constexpr size_t kDefaultMaxSize = 16;
  RecyclingPoolOptions& set_max_size(size_t max_size) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    max_size_ = SaturatingIntCast<uint32_t>(max_size);
    return *this;
  }
  RecyclingPoolOptions&& set_max_size(size_t max_size) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_max_size(max_size));
  }
  size_t max_size() const { return max_size_; }

  // Maximum time for keeping an object in a pool. Objects idling for more than
  // this will be evicted in a background thread.
  //
  // `absl::InfiniteDuration()` disables time-based eviction.
  //
  // `absl::ZeroDuration()` lets objects be destroyed right after they are no
  // longer needed, but asynchronously, i.e. without blocking the calling
  // thread.
  //
  // Default: `kDefaultMaxAge` (`absl::Minutes(1)`).
  static constexpr absl::Duration kDefaultMaxAge = absl::Minutes(1);
  RecyclingPoolOptions& set_max_age(absl::Duration max_age) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    max_age_seconds_ = AgeToSeconds(max_age);
    return *this;
  }
  RecyclingPoolOptions&& set_max_age(absl::Duration max_age) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_max_age(max_age));
  }
  absl::Duration max_age() const {
    return max_age_seconds_ == std::numeric_limits<uint32_t>::max()
               ? absl::InfiniteDuration()
               : absl::Seconds(max_age_seconds_);
  }

 private:
  // For `max_size_` and `max_age_seconds_`.
  template <typename T, typename Deleter>
  friend class RecyclingPool;
  // For `max_size_` and `max_age_seconds_`.
  template <typename T, typename Key, typename Deleter>
  friend class KeyedRecyclingPool;

  static uint32_t AgeToSeconds(absl::Duration age);  // Round up.

  // Use `uint32_t` instead of `size_t` to reduce the object size.
  uint32_t max_size_ = IntCast<uint32_t>(kDefaultMaxSize);
  // Use `uint32_t` instead of `absl::Duration` to reduce the object size.
  // `std::numeric_limits<uint32_t>::max()` means `absl::InfiniteDuration()`.
  uint32_t max_age_seconds_ = 60;  // `AgeToSeconds(kDefaultMaxAge)`
};

// `RecyclingPool<T, Deleter>` keeps a pool of idle objects of type `T`, so that
// instead of creating a new object of type `T`, an existing object can be
// recycled. This is helpful if constructing a new object is more expensive than
// resetting an existing object to the desired state.
//
// Deleter specifies how an object should be eventually deleted, like in
// `std::unique_ptr<T, Deleter>`.
//
// `RecyclingPool` is thread-safe.
template <typename T, typename Deleter = std::default_delete<T>>
class RecyclingPool : public BackgroundCleanee {
 public:
  // A deleter which puts the object back into the pool.
  class Recycler;

  // A `std::unique_ptr` which puts the object back into the pool instead of
  // deleting it. If a particular object is not suitable for recycling,
  // `DoNotRecycle()` can be used.
  using Handle = std::unique_ptr<T, Recycler>;

  // A `std::unique_ptr` which deletes the object. If a particular object is
  // suitable for recycling, it can be put back into the pool using `RawPut()`.
  using RawHandle = std::unique_ptr<T, Deleter>;

  // A refurbisher which does nothing; see `Get()`.
  struct DefaultRefurbisher {
    void operator()(ABSL_ATTRIBUTE_UNUSED T* ptr) const {}
  };

  explicit RecyclingPool(RecyclingPoolOptions options = RecyclingPoolOptions())
      : max_age_seconds_(options.max_age_seconds_),
        ring_buffer_by_age_(options.max_size()) {}

  RecyclingPool(const RecyclingPool&) = delete;
  RecyclingPool& operator=(const RecyclingPool&) = delete;

  ~RecyclingPool();

  // Returns a default global pool specific to template parameters of
  // `RecyclingPool` and `options`.
  static RecyclingPool& global(
      RecyclingPoolOptions options = RecyclingPoolOptions());

  // Uses a different `BackgroundCleaner` than `BackgroundCleaner::global()` for
  // scheduling background cleaning. This is useful for testing.
  //
  // Precondition: `BackgroundCleaner` was not used yet.
  void SetBackgroundCleaner(BackgroundCleaner* cleaner);

  // Deletes all objects stored in the pool.
  void Clear();

  // Creates an object, or returns an existing object from the pool if possible.
  //
  // `factory` takes no arguments and returns `RawHandle`. It is called to
  // create a new object.
  //
  // If `refurbisher` is specified, it takes a `T*` argument and its result is
  // ignored. It is called before returning an existing object.
  template <typename Factory, typename Refurbisher = DefaultRefurbisher>
  Handle Get(Factory&& factory,
             Refurbisher&& refurbisher = DefaultRefurbisher())
      ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Deletes the object immediately, does not return it into the pool.
  //
  // This can be called on the result of `Get()` if the object turned out to be
  // not suitable for recycling.
  //
  // Equivalent to calling the original `Deleter` if `object != nullptr`.
  static void DoNotRecycle(Handle object);

  // Like `Get()`, but the object is not returned into the pool by the
  // destructor of its handle. If the object is suitable for recycling, it can
  // be put back into the pool using `RawPut()`.
  template <typename Factory, typename Refurbisher = DefaultRefurbisher>
  RawHandle RawGet(Factory&& factory,
                   Refurbisher&& refurbisher = DefaultRefurbisher());

  // Puts an idle object into the pool for recycling.
  void RawPut(RawHandle object);

 protected:
  void Clean(absl::Time now) override;

 private:
  struct Entry {
    RawHandle object;
    absl::Time deadline;
  };

  uint32_t max_age_seconds_;
  // If not `nullptr` then `this` has been registered at `*cleaner_`.
  BackgroundCleaner* cleaner_ = nullptr;
  BackgroundCleaner::Token cleaner_token_;
  absl::Mutex mutex_;
  // All objects, ordered by age (older to newer).
  uint32_t ring_buffer_end_ ABSL_GUARDED_BY(mutex_) = 0;
  uint32_t ring_buffer_size_ ABSL_GUARDED_BY(mutex_) = 0;
  // `ABSL_GUARDED_BY(mutex_)` for elements but not for `size()`.
  std::vector<Entry> ring_buffer_by_age_;
};

// `KeyedRecyclingPool<T, Key, Deleter>` keeps a pool of idle objects of type
// `T`, so that instead of creating a new object of type `T`, an existing object
// can be recycled. This is helpful if constructing a new object is more
// expensive than resetting an existing object to the desired state.
//
// Deleter specifies how an object should be eventually deleted, like in
// `std::unique_ptr<T, Deleter>`.
//
// The `Key` parameter allows to find an object to reuse only among compatible
// objects, which should be assigned the same key. The `Key` type must be
// equality comparable, hashable (by `absl::Hash`), default constructible, and
// copyable.
//
// `KeyedRecyclingPool` is thread-safe.
template <typename T, typename Key, typename Deleter = std::default_delete<T>>
class KeyedRecyclingPool : public BackgroundCleanee {
 public:
  // A deleter which puts the object back into the pool.
  class Recycler;

  // A `std::unique_ptr` which puts the object back into the pool instead of
  // deleting it. If a particular object is not suitable for recycling,
  // `DoNotRecycle()` can be used.
  using Handle = std::unique_ptr<T, Recycler>;

  // A `std::unique_ptr` which deletes the object. If a particular object is
  // suitable for recycling, it can be put back into the pool using `RawPut()`.
  using RawHandle = std::unique_ptr<T, Deleter>;

  // A refurbisher which does nothing; see `Get()`.
  struct DefaultRefurbisher {
    void operator()(ABSL_ATTRIBUTE_UNUSED T* ptr) const {}
  };

  explicit KeyedRecyclingPool(
      RecyclingPoolOptions options = RecyclingPoolOptions())
      : max_size_(options.max_size_),
        max_age_seconds_(options.max_age_seconds_) {}

  KeyedRecyclingPool(const KeyedRecyclingPool&) = delete;
  KeyedRecyclingPool& operator=(const KeyedRecyclingPool&) = delete;

  ~KeyedRecyclingPool();

  // Returns a default global pool specific to template parameters of
  // `KeyedRecyclingPool` and `options`.
  static KeyedRecyclingPool& global(
      RecyclingPoolOptions options = RecyclingPoolOptions());

  // Uses a different `BackgroundCleaner` than `BackgroundCleaner::global()` for
  // scheduling background cleaning. This is useful for testing.
  //
  // Precondition: `BackgroundCleaner` was not used yet.
  void SetBackgroundCleaner(BackgroundCleaner* cleaner);

  // Deletes all objects stored in the pool.
  void Clear();

  // Creates an object, or returns an existing object from the pool if possible.
  //
  // `factory` takes no arguments and returns `RawHandle`. It is called to
  // create a new object.
  //
  // If `refurbisher` is specified, it takes a `T*` argument and its result is
  // ignored. It is called before returning an existing object.
  template <typename Factory, typename Refurbisher = DefaultRefurbisher>
  Handle Get(Key key, Factory&& factory,
             Refurbisher&& refurbisher = DefaultRefurbisher())
      ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Deletes the object immediately, does not return it into the pool.
  //
  // This can be called on the result of `Get()` if the object turned out to be
  // not suitable for recycling.
  //
  // Equivalent to calling the original `Deleter` if `object != nullptr`.
  static void DoNotRecycle(Handle object);

  // Like `Get()`, but the object is not returned into the pool by the
  // destructor of its handle. If the object is suitable for recycling, it can
  // be put back into the pool using `RawPut()`.
  template <typename Factory, typename Refurbisher = DefaultRefurbisher>
  RawHandle RawGet(const Key& key, Factory&& factory,
                   Refurbisher&& refurbisher = DefaultRefurbisher());

  // Puts an idle object into the pool for recycling, possibly under a different
  // key.
  void RawPut(const Key& key, RawHandle object);

 protected:
  void Clean(absl::Time now) override;

 private:
  struct ByAgeEntry {
    explicit ByAgeEntry(const Key& key, absl::Time deadline)
        : key(key), deadline(deadline) {}

    Key key;
    absl::Time deadline;
  };

  // Adding or removing elements in `ByAge` must not invalidate other iterators.
  using ByAge = std::list<ByAgeEntry>;

  struct ByKeyEntry {
    ByKeyEntry(RawHandle object, typename ByAge::iterator by_age_iter)
        : object(std::move(object)), by_age_iter(by_age_iter) {}

    RawHandle object;
    typename ByAge::iterator by_age_iter;
  };

  // `std::list` has a smaller overhead than `std::deque` for short sequences.
  using ByKeyEntries = std::list<ByKeyEntry>;

  using ByKey = absl::flat_hash_map<Key, ByKeyEntries>;

  uint32_t max_size_;
  uint32_t max_age_seconds_;
  // If not `nullptr` then `this` has been registered at `*cleaner_`.
  BackgroundCleaner* cleaner_ = nullptr;
  BackgroundCleaner::Token cleaner_token_;
  absl::Mutex mutex_;
  // The key of each object, ordered by the age of the object (older to
  // newer).
  ByAge by_age_ ABSL_GUARDED_BY(mutex_);
  // Objects grouped by their keys. Within each map value the list of objects is
  // non-empty and is ordered by their age (older to newer). Each object is
  // associated with the matching `by_age_` iterator.
  ByKey by_key_ ABSL_GUARDED_BY(mutex_);
  // Optimization for `Get()` followed by `Put()` with a matching key.
  // If `cache_ != by_key_.end()`, then `cache_->second.back().object` was
  // replaced with `nullptr` instead of erasing the corresponding entries,
  // to avoid allocating them again if a matching objects is put again.
  typename ByKey::iterator cache_ ABSL_GUARDED_BY(mutex_) = by_key_.end();
};

// Implementation details follow.

inline uint32_t RecyclingPoolOptions::AgeToSeconds(absl::Duration age) {
  if (age >= absl::Seconds(std::numeric_limits<uint32_t>::max())) {
    return std::numeric_limits<uint32_t>::max();
  }
  if (age <= absl::ZeroDuration()) return 0;
  int64_t seconds = absl::ToInt64Seconds(age);
  if (age != absl::Seconds(seconds)) ++seconds;  // Round up.
  return IntCast<uint32_t>(seconds);
}

namespace recycling_pool_internal {

template <typename RecyclingPool, typename Deleter>
class RecyclerRepr {
 public:
  RecyclerRepr() = default;

  explicit RecyclerRepr(RecyclingPool* pool, Deleter&& deleter)
      : deleter_(std::move(deleter)), pool_(pool) {}

  RecyclingPool* pool() const { return pool_; }

  Deleter& deleter() { return deleter_; }
  const Deleter& deleter() const { return deleter_; }

 private:
  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS Deleter deleter_;
  RecyclingPool* pool_ = nullptr;
};

struct RecyclingPoolOptionsKey : WithEqual<RecyclingPoolOptionsKey> {
  explicit RecyclingPoolOptionsKey(uint32_t max_size, uint32_t max_age_seconds)
      : max_size(max_size), max_age_seconds(max_age_seconds) {}

  friend bool operator==(const RecyclingPoolOptionsKey& a,
                         const RecyclingPoolOptionsKey& b) {
    return a.max_size == b.max_size && a.max_age_seconds == b.max_age_seconds;
  }

  template <typename HashState>
  friend HashState AbslHashValue(HashState hash_state,
                                 const RecyclingPoolOptionsKey& self) {
    return HashState::combine(std::move(hash_state), self.max_size,
                              self.max_age_seconds);
  }

  uint32_t max_size;
  uint32_t max_age_seconds;
};

}  // namespace recycling_pool_internal

template <typename T, typename Deleter>
class RecyclingPool<T, Deleter>::Recycler {
 public:
  Recycler() = default;

  explicit Recycler(RecyclingPool* pool, Deleter&& deleter)
      : repr_(pool, std::move(deleter)) {
    RIEGELI_ASSERT_NE(repr_.pool(), nullptr)
        << "Failed precondition of Recycler: null RecyclingPool pointer";
  }

  void operator()(T* ptr) const {
    RIEGELI_ASSERT_NE(repr_.pool(), nullptr)
        << "Failed precondition of RecyclingPool::Recycler: "
           "default-constructed recycler used with an object";
    repr_.pool()->RawPut(RawHandle(ptr, original_deleter()));
  }

  Deleter& original_deleter() { return repr_.deleter(); }
  const Deleter& original_deleter() const { return repr_.deleter(); }

 private:
  recycling_pool_internal::RecyclerRepr<RecyclingPool, Deleter> repr_;
};

template <typename T, typename Deleter>
RecyclingPool<T, Deleter>::~RecyclingPool() {
  if (cleaner_ != nullptr) cleaner_->Unregister(cleaner_token_);
}

template <typename T, typename Deleter>
RecyclingPool<T, Deleter>& RecyclingPool<T, Deleter>::global(
    RecyclingPoolOptions options) {
  class ABSL_CACHELINE_ALIGNED Pools {
   public:
    RecyclingPool& GetPool(RecyclingPoolOptions options) {
      std::pair<const recycling_pool_internal::RecyclingPoolOptionsKey,
                RecyclingPool>* cached = cache_.load(std::memory_order_acquire);
      const recycling_pool_internal::RecyclingPoolOptionsKey options_key(
          options.max_size_, options.max_age_seconds_);
      if (ABSL_PREDICT_FALSE(cached == nullptr ||
                             cached->first != options_key)) {
        absl::MutexLock lock(mutex_);
        const auto iter = pools_.try_emplace(options_key, options).first;
        cached = &*iter;
        cache_.store(cached, std::memory_order_release);
      }
      return cached->second;
    }

   private:
    // If not `nullptr`, points to the most recently returned node from
    // `pools_`.
    std::atomic<std::pair<
        const recycling_pool_internal::RecyclingPoolOptionsKey, RecyclingPool>*>
        cache_{nullptr};
    absl::Mutex mutex_;
    // Pointer stability required for `GetPool()`, node stability required for
    // `cache_`.
    absl::node_hash_map<recycling_pool_internal::RecyclingPoolOptionsKey,
                        RecyclingPool>
        pools_ ABSL_GUARDED_BY(mutex_);
  };

  return Global([] { return Pools(); }).GetPool(options);
}

template <typename T, typename Deleter>
void RecyclingPool<T, Deleter>::SetBackgroundCleaner(
    BackgroundCleaner* cleaner) {
  RIEGELI_ASSERT_EQ(cleaner_, nullptr)
      << "Failed precondition of RecyclingPool::SetBackgroundCleaner(): "
         "BackgroundCleaner was already used";
  cleaner_ = cleaner;
  cleaner_token_ = cleaner_->Register(this);
}

template <typename T, typename Deleter>
void RecyclingPool<T, Deleter>::Clear() {
  if (cleaner_ != nullptr) cleaner_->CancelCleaning(cleaner_token_);
  absl::InlinedVector<RawHandle, 16> evicted;
  absl::MutexLock lock(mutex_);
  evicted.reserve(ring_buffer_size_);
  while (ring_buffer_size_ > 0) {
    if (ring_buffer_end_ == 0) {
      ring_buffer_end_ = IntCast<uint32_t>(ring_buffer_by_age_.size());
    }
    --ring_buffer_end_;
    --ring_buffer_size_;
    evicted.push_back(std::move(ring_buffer_by_age_[ring_buffer_end_].object));
  }
  // Destroy `evicted` after releasing `mutex_`.
}

template <typename T, typename Deleter>
template <typename Factory, typename Refurbisher>
typename RecyclingPool<T, Deleter>::Handle RecyclingPool<T, Deleter>::Get(
    Factory&& factory,
    Refurbisher&& refurbisher) ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RawHandle returned = RawGet(std::forward<Factory>(factory),
                              std::forward<Refurbisher>(refurbisher));
  return Handle(returned.release(),
                Recycler(this, std::move(returned.get_deleter())));
}

template <typename T, typename Deleter>
void RecyclingPool<T, Deleter>::DoNotRecycle(Handle object) {
  T* const ptr = object.release();
  if (ptr != nullptr) object.get_deleter().original_deleter()(ptr);
}

template <typename T, typename Deleter>
template <typename Factory, typename Refurbisher>
typename RecyclingPool<T, Deleter>::RawHandle RecyclingPool<T, Deleter>::RawGet(
    Factory&& factory, Refurbisher&& refurbisher) {
  RawHandle returned;
  {
    absl::MutexLock lock(mutex_);
    if (ABSL_PREDICT_TRUE(ring_buffer_size_ > 0)) {
      if (ring_buffer_end_ == 0) {
        ring_buffer_end_ = IntCast<uint32_t>(ring_buffer_by_age_.size());
      }
      --ring_buffer_end_;
      --ring_buffer_size_;
      // Return the newest entry.
      returned = std::move(ring_buffer_by_age_[ring_buffer_end_].object);
    }
  }
  if (ABSL_PREDICT_TRUE(returned != nullptr)) {
    std::forward<Refurbisher>(refurbisher)(returned.get());
  } else {
    returned = std::forward<Factory>(factory)();
  }
  return returned;
}

template <typename T, typename Deleter>
void RecyclingPool<T, Deleter>::RawPut(RawHandle object) {
  if (ABSL_PREDICT_FALSE(ring_buffer_by_age_.empty())) return;
  RawHandle evicted;
  absl::Time deadline = absl::InfiniteFuture();
  {
    absl::MutexLock lock(mutex_);
    // Add a newest entry. Evict the oldest entry if the pool is full.
    if (max_age_seconds_ != std::numeric_limits<uint32_t>::max()) {
      if (ABSL_PREDICT_FALSE(cleaner_ == nullptr)) {
        cleaner_ = &BackgroundCleaner::global();
        cleaner_token_ = cleaner_->Register(this);
      }
      deadline = cleaner_->TimeNow() + absl::Seconds(max_age_seconds_);
    }
    Entry& entry = ring_buffer_by_age_[ring_buffer_end_];
    evicted = std::exchange(entry.object, std::move(object));
    entry.deadline = deadline;
    ++ring_buffer_end_;
    if (ring_buffer_end_ == ring_buffer_by_age_.size()) ring_buffer_end_ = 0;
    if (ABSL_PREDICT_TRUE(ring_buffer_size_ < ring_buffer_by_age_.size())) {
      ++ring_buffer_size_;
    }
    // If `deadline == absl::InfiniteFuture()` then `cleaner_` might be
    // `nullptr`.
    if (ring_buffer_size_ > 1 || deadline == absl::InfiniteFuture()) {
      // No need to schedule cleaning.
      return;
    }
  }
  // Schedule cleaning and destroy `evicted` after releasing `mutex_`.
  cleaner_->ScheduleCleaning(cleaner_token_, deadline);
}

template <typename T, typename Deleter>
void RecyclingPool<T, Deleter>::Clean(absl::Time now) {
  absl::InlinedVector<RawHandle, 16> evicted;
  absl::Time deadline;
  {
    absl::MutexLock lock(mutex_);
    size_t index = ring_buffer_end_;
    if (index < ring_buffer_size_) index += ring_buffer_by_age_.size();
    index -= ring_buffer_size_;
    for (;;) {
      if (ring_buffer_size_ == 0) {
        // Everything evicted, no need to schedule cleaning.
        return;
      }
      Entry& entry = ring_buffer_by_age_[index];
      if (entry.deadline > now) {
        // Schedule cleaning for the remaining entries.
        deadline = entry.deadline;
        break;
      }
      // Evict the oldest entry.
      evicted.push_back(std::move(entry.object));
      ++index;
      if (index == ring_buffer_by_age_.size()) index = 0;
      --ring_buffer_size_;
    }
  }
  // Schedule cleaning and destroy `evicted` after releasing `mutex_`.
  cleaner_->ScheduleCleaning(cleaner_token_, deadline);
}

template <typename T, typename Key, typename Deleter>
class KeyedRecyclingPool<T, Key, Deleter>::Recycler {
 public:
  Recycler() = default;

  explicit Recycler(KeyedRecyclingPool* pool, Key&& key, Deleter&& deleter)
      : repr_(pool, std::move(deleter)), key_(std::move(key)) {
    RIEGELI_ASSERT_NE(repr_.pool(), nullptr)
        << "Failed precondition of Recycler: null KeyedRecyclingPool pointer";
  }

  void operator()(T* ptr) const {
    RIEGELI_ASSERT_NE(repr_.pool(), nullptr)
        << "Failed precondition of KeyedRecyclingPool::Recycler: "
           "default-constructed recycler used with an object";
    repr_.pool()->RawPut(key_, RawHandle(ptr, original_deleter()));
  }

  Deleter& original_deleter() { return repr_.deleter(); }
  const Deleter& original_deleter() const { return repr_.deleter(); }

 private:
  recycling_pool_internal::RecyclerRepr<KeyedRecyclingPool, Deleter> repr_;
  Key key_;
};

template <typename T, typename Key, typename Deleter>
KeyedRecyclingPool<T, Key, Deleter>::~KeyedRecyclingPool() {
  if (cleaner_ != nullptr) cleaner_->Unregister(cleaner_token_);
}

template <typename T, typename Key, typename Deleter>
KeyedRecyclingPool<T, Key, Deleter>&
KeyedRecyclingPool<T, Key, Deleter>::global(RecyclingPoolOptions options) {
  class Pools {
   public:
    KeyedRecyclingPool& GetPool(RecyclingPoolOptions options) {
      std::pair<const recycling_pool_internal::RecyclingPoolOptionsKey,
                KeyedRecyclingPool>* cached =
          cache_.load(std::memory_order_acquire);
      const recycling_pool_internal::RecyclingPoolOptionsKey options_key(
          options.max_size_, options.max_age_seconds_);
      if (ABSL_PREDICT_FALSE(cached == nullptr ||
                             cached->first != options_key)) {
        absl::MutexLock lock(mutex_);
        const auto iter = pools_.try_emplace(options_key, options).first;
        cached = &*iter;
        cache_.store(cached, std::memory_order_release);
      }
      return cached->second;
    }

   private:
    // If not `nullptr`, points to the most recently returned node from
    // `pools_`.
    std::atomic<
        std::pair<const recycling_pool_internal::RecyclingPoolOptionsKey,
                  KeyedRecyclingPool>*>
        cache_{nullptr};
    absl::Mutex mutex_;
    // Pointer stability required for `GetPool()`, node stability required for
    // `cache_`.
    absl::node_hash_map<recycling_pool_internal::RecyclingPoolOptionsKey,
                        KeyedRecyclingPool>
        pools_ ABSL_GUARDED_BY(mutex_);
  };

  return Global([] { return Pools(); }).GetPool(options);
}

template <typename T, typename Key, typename Deleter>
void KeyedRecyclingPool<T, Key, Deleter>::SetBackgroundCleaner(
    BackgroundCleaner* cleaner) {
  RIEGELI_ASSERT_EQ(cleaner_, nullptr)
      << "Failed precondition of KeyedRecyclingPool::SetBackgroundCleaner(): "
         "BackgroundCleaner was already used";
  cleaner_ = cleaner;
  cleaner_token_ = cleaner_->Register(this);
}

template <typename T, typename Key, typename Deleter>
void KeyedRecyclingPool<T, Key, Deleter>::Clear() {
  if (cleaner_ != nullptr) cleaner_->CancelCleaning(cleaner_token_);
  ByAge evicted_by_age;
  ByKey evicted_by_key;
  absl::MutexLock lock(mutex_);
  evicted_by_age = std::exchange(by_age_, ByAge());
  evicted_by_key = std::exchange(by_key_, ByKey());
  cache_ = by_key_.end();
  // Destroy `evicted_by_age` and `evicted_by_key` after releasing `mutex_`.
}

template <typename T, typename Key, typename Deleter>
template <typename Factory, typename Refurbisher>
typename KeyedRecyclingPool<T, Key, Deleter>::Handle
KeyedRecyclingPool<T, Key, Deleter>::Get(Key key, Factory&& factory,
                                         Refurbisher&& refurbisher)
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RawHandle returned = RawGet(key, std::forward<Factory>(factory),
                              std::forward<Refurbisher>(refurbisher));
  return Handle(
      returned.release(),
      Recycler(this, std::move(key), std::move(returned.get_deleter())));
}

template <typename T, typename Key, typename Deleter>
void KeyedRecyclingPool<T, Key, Deleter>::DoNotRecycle(Handle object) {
  T* const ptr = object.release();
  if (ptr != nullptr) object.get_deleter().original_deleter()(ptr);
}

template <typename T, typename Key, typename Deleter>
template <typename Factory, typename Refurbisher>
typename KeyedRecyclingPool<T, Key, Deleter>::RawHandle
KeyedRecyclingPool<T, Key, Deleter>::RawGet(const Key& key, Factory&& factory,
                                            Refurbisher&& refurbisher) {
  RawHandle returned;
  {
    absl::MutexLock lock(mutex_);
    if (cache_ != by_key_.end()) {
      // Finish erasing the cached entry.
      ByKeyEntries& by_key_entries = cache_->second;
      RIEGELI_ASSERT(!by_key_entries.empty())
          << "Failed invariant of KeyedRecyclingPool: "
             "empty by_key_ value";
      RIEGELI_ASSERT_EQ(by_key_entries.back().object, nullptr)
          << "Failed invariant of KeyedRecyclingPool: "
             "non-nullptr object pointed to by cache_";
      by_age_.erase(by_key_entries.back().by_age_iter);
      by_key_entries.pop_back();
      if (by_key_entries.empty()) by_key_.erase(cache_);
    }
    const typename ByKey::iterator by_key_iter = by_key_.find(key);
    if (ABSL_PREDICT_TRUE(by_key_iter != by_key_.end())) {
      // Return the newest entry with this key.
      ByKeyEntries& by_key_entries = by_key_iter->second;
      RIEGELI_ASSERT(!by_key_entries.empty())
          << "Failed invariant of KeyedRecyclingPool: "
             "empty by_key_ value";
      RIEGELI_ASSERT_NE(by_key_entries.back().object, nullptr)
          << "Failed invariant of KeyedRecyclingPool: "
             "nullptr object not pointed to by cache_";
      returned = std::move(by_key_entries.back().object);
    }
    cache_ = by_key_iter;
  }
  if (ABSL_PREDICT_TRUE(returned != nullptr)) {
    std::forward<Refurbisher>(refurbisher)(returned.get());
  } else {
    returned = std::forward<Factory>(factory)();
  }
  return returned;
}

template <typename T, typename Key, typename Deleter>
void KeyedRecyclingPool<T, Key, Deleter>::RawPut(const Key& key,
                                                 RawHandle object) {
  if (ABSL_PREDICT_FALSE(max_size_ == 0)) return;
  RawHandle evicted;
  absl::Time deadline = absl::InfiniteFuture();
  {
    absl::MutexLock lock(mutex_);
    // Add a newest entry with this key.
    if (max_age_seconds_ != std::numeric_limits<uint32_t>::max()) {
      if (ABSL_PREDICT_FALSE(cleaner_ == nullptr)) {
        cleaner_ = &BackgroundCleaner::global();
        cleaner_token_ = cleaner_->Register(this);
      }
      deadline = cleaner_->TimeNow() + absl::Seconds(max_age_seconds_);
    }
    if (ABSL_PREDICT_TRUE(cache_ != by_key_.end())) {
      ByKeyEntries& by_key_entries = cache_->second;
      RIEGELI_ASSERT(!by_key_entries.empty())
          << "Failed invariant of KeyedRecyclingPool: "
             "empty by_key_ value";
      RIEGELI_ASSERT_EQ(by_key_entries.back().object, nullptr)
          << "Failed invariant of KeyedRecyclingPool: "
             "non-nullptr object pointed to by cache_";
      if (ABSL_PREDICT_TRUE(cache_->first == key)) {
        // `cache_` hit. Set the object pointer again, move the entry to the end
        // of `by_age_`, and update its deadline.
        ByKeyEntry& by_key_entry = by_key_entries.back();
        by_key_entry.object = std::move(object);
        by_age_.splice(by_age_.end(), by_age_, by_key_entry.by_age_iter);
        by_key_entry.by_age_iter->deadline = deadline;
        goto done;
      }
      // `cache_` miss. Finish erasing the cached entry.
      by_age_.erase(by_key_entries.back().by_age_iter);
      by_key_entries.pop_back();
      if (by_key_entries.empty()) by_key_.erase(cache_);
    }
    by_age_.emplace_back(key, deadline);
    // Local scope so that `goto done` does not jump into the scope of
    // `by_age_iter`.
    {
      typename ByAge::iterator by_age_iter = by_age_.end();
      --by_age_iter;
      // This invalidates `by_key_` iterators, including `cache_`.
      by_key_[key].emplace_back(std::move(object), by_age_iter);
    }
    if (ABSL_PREDICT_FALSE(by_age_.size() > max_size_)) {
      // Evict the oldest entry.
      const typename ByKey::iterator by_key_iter =
          by_key_.find(by_age_.front().key);
      RIEGELI_ASSERT(by_key_iter != by_key_.end())
          << "Failed invariant of KeyedRecyclingPool: "
             "a key from by_age_ absent in by_key_";
      ByKeyEntries& by_key_entries = by_key_iter->second;
      RIEGELI_ASSERT(!by_key_entries.empty())
          << "Failed invariant of KeyedRecyclingPool: "
             "empty by_key_ value";
      RIEGELI_ASSERT_NE(by_key_entries.front().object, nullptr)
          << "Failed invariant of KeyedRecyclingPool: "
             "nullptr object not pointed to by cache_";
      evicted = std::move(by_key_entries.front().object);
      by_key_entries.pop_front();
      if (by_key_entries.empty()) by_key_.erase(by_key_iter);
      by_age_.pop_front();
    }
  done:
    cache_ = by_key_.end();
    // If `deadline == absl::InfiniteFuture()` then `cleaner_` might be
    // `nullptr`.
    if (by_age_.size() > 1 || deadline == absl::InfiniteFuture()) {
      // No need to schedule cleaning.
      return;
    }
  }
  // Schedule cleaning and destroy `evicted` after releasing `mutex_`.
  cleaner_->ScheduleCleaning(cleaner_token_, deadline);
}

template <typename T, typename Key, typename Deleter>
void KeyedRecyclingPool<T, Key, Deleter>::Clean(absl::Time now) {
  absl::InlinedVector<RawHandle, 16> evicted;
  absl::Time deadline;
  {
    absl::MutexLock lock(mutex_);
    for (;; by_age_.pop_front()) {
      if (by_age_.empty()) {
        // Everything evicted, no need to schedule cleaning.
        return;
      }
      const ByAgeEntry& by_age_entry = by_age_.front();
      if (by_age_entry.deadline > now) {
        if (cache_ != by_key_.end() && cache_->first == by_age_entry.key) {
          const ByKeyEntries& by_key_entries = cache_->second;
          RIEGELI_ASSERT(!by_key_entries.empty())
              << "Failed invariant of KeyedRecyclingPool: "
                 "empty by_key_ value";
          if (by_key_entries.front().object == nullptr) {
            // Finish erasing the cached entry.
            RIEGELI_ASSERT_EQ(by_key_entries.size(), 1u)
                << "Failed invariant of KeyedRecyclingPool: "
                   "nullptr object not at the end of by_key_ value";
            by_key_.erase(cache_);
            cache_ = by_key_.end();
            continue;
          }
        }
        // Schedule cleaning for the remaining entries.
        deadline = by_age_entry.deadline;
        break;
      }
      // Evict the oldest entry.
      const typename ByKey::iterator by_key_iter =
          by_key_.find(by_age_entry.key);
      RIEGELI_ASSERT(by_key_iter != by_key_.end())
          << "Failed invariant of KeyedRecyclingPool: "
             "a key from by_age_ absent in by_key_";
      ByKeyEntries& by_key_entries = by_key_iter->second;
      RIEGELI_ASSERT(!by_key_entries.empty())
          << "Failed invariant of KeyedRecyclingPool: "
             "empty by_key_ value";
      if (by_key_entries.front().object == nullptr) {
        // Finish erasing the cached entry.
        RIEGELI_ASSERT(cache_ == by_key_iter)
            << "Failed invariant of KeyedRecyclingPool: "
               "nullptr object not pointed to by cache_";
        RIEGELI_ASSERT(cache_->first == by_age_entry.key)
            << "Failed invariant of KeyedRecyclingPool: "
               "nullptr object not pointed to by cache_";
        RIEGELI_ASSERT_EQ(by_key_entries.size(), 1u)
            << "Failed invariant of KeyedRecyclingPool: "
               "nullptr object not at the end of by_key_ value";
        by_key_.erase(by_key_iter);
        cache_ = by_key_.end();
        continue;
      }
      evicted.push_back(std::move(by_key_entries.front().object));
      by_key_entries.pop_front();
      if (by_key_entries.empty()) by_key_.erase(by_key_iter);
    }
  }
  // Schedule cleaning and destroy `evicted` after releasing `mutex_`.
  cleaner_->ScheduleCleaning(cleaner_token_, deadline);
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_RECYCLING_POOL_H_


================================================
FILE: riegeli/base/ref_count.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_REF_COUNT_H_
#define RIEGELI_BASE_REF_COUNT_H_

#include <stddef.h>

#include <atomic>
#include <type_traits>

#include "riegeli/base/ownership.h"

namespace riegeli {

// `RefCount` provides operations on an atomic reference count.
class RefCount {
 public:
  RefCount() = default;

  RefCount(const RefCount&) = delete;
  RefCount& operator=(const RefCount&) = delete;

  // Increments the reference count.
  //
  // Does nothing if `Ownership` is `PassOwnership`.
  template <typename Ownership = ShareOwnership,
            std::enable_if_t<IsOwnership<Ownership>::value, int> = 0>
  void Ref() const {
    if (std::is_same_v<Ownership, ShareOwnership>) {
      ref_count_.fetch_add(1, std::memory_order_relaxed);
    }
  }

  // Decrements the reference count. Returns `true` when this was the last
  // reference.
  //
  // Does nothing and returns `false` if `Ownership` is `ShareOwnership`.
  //
  // When `Unref()` returns `true`, the decrement can be skipped and the actual
  // value of the reference count is unspecified. This avoids an expensive
  // atomic read-modify-write operation, making the last `Unref()` much faster,
  // at the cost of making a non-last `Unref()` a bit slower. This is in
  // contrast to `std::shared_ptr` in libc++ and libstdc++.
  template <typename Ownership = PassOwnership,
            std::enable_if_t<IsOwnership<Ownership>::value, int> = 0>
  bool Unref() const {
    return std::is_same_v<Ownership, PassOwnership> &&
           (HasUniqueOwner() ||
            ref_count_.fetch_sub(1, std::memory_order_acq_rel) == 1);
  }

  // Returns `true` if there is only one owner of the object.
  //
  // This can be used to check if the object may be modified.
  bool HasUniqueOwner() const {
    return ref_count_.load(std::memory_order_acquire) == 1;
  }

  // Returns the current count.
  //
  // If the `RefCount` is accessed by multiple threads, this is a snapshot of
  // the count which may change asynchronously, hence usage of `GetCount()`
  // should be limited to cases not important for correctness, like producing
  // debugging output.
  //
  // The count can be reliably compared against 1 with `HasUniqueOwner()`.
  size_t GetCount() const { return ref_count_.load(std::memory_order_relaxed); }

 private:
  mutable std::atomic<size_t> ref_count_ = 1;
};

}  // namespace riegeli

#endif  // RIEGELI_BASE_REF_COUNT_H_


================================================
FILE: riegeli/base/reset.h
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_RESET_H_
#define RIEGELI_BASE_RESET_H_

#include <stddef.h>

#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/nullability.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// To make an existing `dest` object of some class `T` equivalent to a newly
// constructed `T`, certain classes use a convention of providing member
// functions `Reset()`, which mirror constructors of these classes (except for
// copy and move constructors which are mirrored by the assignment operators).
// This avoids constructing a temporary `T` and moving from it.
//
// If it is not known whether the given class provides member functions
// `Reset()`, generic code can use `riegeli::Reset(dest, args...)`.
// This calls the first defined form among the following:
//  * `RiegeliReset(dest, args...)`
//  * `dest.Reset(args...)`
//  * `dest = args...` (if `args...` has a single element)
//  * `dest = T(args...)`
//
// Hence to customize `riegeli::Reset()` for a class `T`, define overloads of
// either a member function `void T::Reset(...)`, or a free function
// `friend void RiegeliReset(T& dest, ...)` as a friend of `T` inside class
// definition or in the same namespace as `T`, so that it can be found via ADL.
//
// `RiegeliReset()` is predefined for `std::string` and `absl::Cord`.

inline void RiegeliReset(std::string& dest) { dest.clear(); }

inline void RiegeliReset(std::string& dest, size_t size, char fill) {
  dest.assign(size, fill);
}

inline void RiegeliReset(std::string& dest, absl::string_view src) {
  dest.assign(src);
}

inline void RiegeliReset(std::string& dest, const char* src) {
  dest.assign(src);
}

inline void RiegeliReset(std::string& dest, const char* absl_nullable src,
                         size_t length) {
  RIEGELI_ASSERT(src != nullptr || length == 0)
      << "Failed precondition of RiegeliReset(): non-empty span from nullptr";
  dest.assign(src, length);
}

inline void RiegeliReset(absl::Cord& dest) { dest.Clear(); }

inline void RiegeliReset(absl::Cord& dest, absl::string_view src) {
  dest = src;
}

namespace reset_internal {

template <typename Enable, typename T, typename... Args>
struct HasRiegeliResetImpl : std::false_type {};

template <typename T, typename... Args>
struct HasRiegeliResetImpl<std::void_t<decltype(RiegeliReset(
                               std::declval<T&>(), std::declval<Args>()...))>,
                           T, Args...> : std::true_type {};

template <typename T, typename... Args>
struct HasRiegeliReset : HasRiegeliResetImpl<void, T, Args...> {};

template <typename Enable, typename T, typename... Args>
struct HasResetImpl : std::false_type {};

template <typename T, typename... Args>
struct HasResetImpl<
    std::void_t<decltype(std::declval<T&>().Reset(std::declval<Args>()...))>, T,
    Args...> : std::true_type {};

template <typename T, typename... Args>
struct HasReset : HasResetImpl<void, T, Args...> {};

template <typename Enable, typename T, typename... Args>
struct HasAssignmentImpl : std::false_type {};

template <typename T, typename Arg>
struct HasAssignmentImpl<
    std::void_t<decltype(std::declval<T&>() = std::declval<Arg>())>, T, Arg>
    : std::true_type {};

template <typename T, typename... Args>
struct HasAssignment : HasAssignmentImpl<void, T, Args...> {};

}  // namespace reset_internal

// `SupportsReset<T, Args...>::value` is true if `riegeli::Reset(T&, Args...)`
// is supported.
template <typename T, typename... Args>
struct SupportsReset
    : std::disjunction<reset_internal::HasRiegeliReset<T, Args...>,
                       reset_internal::HasReset<T, Args...>,
                       reset_internal::HasAssignment<T, Args...>,
                       std::conjunction<std::is_constructible<T, Args...>,
                                        std::is_move_assignable<T>>> {};

template <typename T, typename... Args,
          std::enable_if_t<SupportsReset<T, Args...>::value, int> = 0>
inline void Reset(T& dest, Args&&... args) {
  if constexpr (reset_internal::HasRiegeliReset<T, Args...>::value) {
    RiegeliReset(dest, std::forward<Args>(args)...);
  } else if constexpr (reset_internal::HasReset<T, Args...>::value) {
    dest.Reset(std::forward<Args>(args)...);
  } else if constexpr (reset_internal::HasAssignment<T, Args...>::value) {
    static_assert(sizeof...(Args) == 1, "Implied by HasAssignment");
    dest = std::forward<Args...>(args...);
  } else if constexpr (std::conjunction_v<std::is_constructible<T, Args...>,
                                          std::is_move_assignable<T>>) {
    dest = T(std::forward<Args>(args)...);
  } else {
    static_assert(std::is_void_v<std::void_t<T>>,
                  "Parameters excluded by function signature");
  }
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_RESET_H_


================================================
FILE: riegeli/base/shared_buffer.cc
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/base/shared_buffer.h"

#include <stddef.h>

#include <ostream>

#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"

namespace riegeli {

void SharedBuffer::DumpStructure(absl::string_view substr,
                                 std::ostream& dest) const {
  dest << "[shared_buffer] {";
  const size_t ref_count = GetRefCount();
  if (ref_count != 1) dest << " ref_count: " << ref_count;
  if (!substr.empty()) {
    if (substr.data() != data()) {
      dest << " space_before: " << PtrDistance(data(), substr.data());
    }
    dest << " space_after: "
         << PtrDistance(substr.data() + substr.size(), data() + capacity());
  }
  dest << " }";
}

}  // namespace riegeli


================================================
FILE: riegeli/base/shared_buffer.h
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_SHARED_BUFFER_H_
#define RIEGELI_BASE_SHARED_BUFFER_H_

#include <stddef.h>

#include <iosfwd>

#include "absl/base/attributes.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffer.h"
#include "riegeli/base/external_data.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/shared_ptr.h"

namespace riegeli {

// Dynamically allocated byte buffer.
//
// Like `Buffer`, but ownership of the data can be shared.
class SharedBuffer {
 public:
  SharedBuffer() = default;

  // Ensures at least `min_capacity` of space.
  explicit SharedBuffer(size_t min_capacity);

  SharedBuffer(const SharedBuffer& that) = default;
  SharedBuffer& operator=(const SharedBuffer& that) = default;

  // The source `SharedBuffer` is left deallocated.
  SharedBuffer(SharedBuffer&& that) = default;
  SharedBuffer& operator=(SharedBuffer&& that) = default;

  // Ensures at least `min_capacity` of space, and unique ownership of the data
  // if `min_capacity > 0`. Existing contents are lost.
  //
  // Drops the allocation if the resulting capacity would be wasteful for
  // `min_capacity`.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(size_t min_capacity = 0);

  // Returns `true` if `*this` is the only owner of the data.
  //
  // If `capacity() == 0`, returns `false`.
  bool IsUnique() const { return buffer_.IsUnique(); }

  // Returns the current reference count.
  //
  // If the `SharedBuffer` is accessed by multiple threads, this is a snapshot
  // of the count which may change asynchronously, hence usage of
  // `GetRefCount()` should be limited to cases not important for correctness,
  // like producing debugging output.
  //
  // The reference count can be reliably compared against 1 with `IsUnique()`.
  size_t GetRefCount() const { return buffer_.GetRefCount(); }

  // Returns the mutable data pointer.
  //
  // Precondition: `IsUnique()`.
  char* mutable_data() const ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Returns the const data pointer.
  const char* data() const ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Returns the usable data size. It can be greater than the requested size.
  size_t capacity() const;

  // Indicates support for:
  //  * `ExternalRef(const SharedBuffer&, substr)`
  //  * `ExternalRef(SharedBuffer&&, substr)`
  friend void RiegeliSupportsExternalRef(const SharedBuffer*) {}

  // Supports `ExternalRef`.
  friend ExternalStorage RiegeliToExternalStorage(SharedBuffer* self) {
    return RiegeliToExternalStorage(&self->buffer_);
  }

  // Supports `ExternalRef` and `Chain::Block`.
  friend void RiegeliDumpStructure(const SharedBuffer* self,
                                   absl::string_view substr,
                                   std::ostream& dest) {
    self->DumpStructure(substr, dest);
  }

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const SharedBuffer* self,
                                        MemoryEstimator& memory_estimator) {
    memory_estimator.RegisterSubobjects(&self->buffer_);
  }

 private:
  void DumpStructure(absl::string_view substr, std::ostream& dest) const;

  SharedPtr<Buffer> buffer_;  // `nullptr` means `capacity() == 0`.
};

// Implementation details follow.

inline SharedBuffer::SharedBuffer(size_t min_capacity)
    : buffer_(min_capacity == 0
                  ? nullptr
                  : SharedPtr<Buffer>(riegeli::Maker(min_capacity))) {}

inline void SharedBuffer::Reset(size_t min_capacity) {
  if (min_capacity == 0) {
    if (!buffer_.IsUnique()) buffer_.Reset();
  } else {
    buffer_.Reset(riegeli::Maker(min_capacity));
  }
}

inline char* SharedBuffer::mutable_data() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_ASSERT(IsUnique())
      << "Failed precondition of SharedBuffer::mutable_data(): "
         "ownership is shared";
  return buffer_->data();
}

inline const char* SharedBuffer::data() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
  if (buffer_ == nullptr) return nullptr;
  return buffer_->data();
}

inline size_t SharedBuffer::capacity() const {
  if (buffer_ == nullptr) return 0;
  return buffer_->capacity();
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_SHARED_BUFFER_H_


================================================
FILE: riegeli/base/shared_ptr.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_SHARED_PTR_H_
#define RIEGELI_BASE_SHARED_PTR_H_

#include <stddef.h>

#include <cstddef>
#include <memory>
#include <new>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/external_data.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/new_aligned.h"
#include "riegeli/base/ref_count.h"

namespace riegeli {

// `SharedPtr<T>` implements shared ownership of an object of type `T`.
// It can also be empty, with the pointer being `nullptr`.
//
// The actual object can be of a subtype of `T`, as long as `T` has a virtual
// destructor and is a leftmost non-virtual base class. Otherwise the object
// must have the same type as `T`, except for possibly different cv-qualifiers.
//
// Compared to `std::shared_ptr`, `SharedPtr` supports `IsUnique()`, and has a
// smaller memory overhead (the pointer has 1 word instead of 2, the allocated
// header before the object has 1 word if `T` does not have a virtual destructor
// and 2 words otherwise, instead of 3 words in either case). Also, the last
// decrement of the reference count is faster than for `std::shared_ptr` in
// libc++ and libstdc++. OTOH `SharedPtr` has fewer features, e.g. no custom
// allocation or deletion, no aliasing constructor, no weak pointers, and it has
// the leftmost non-virtual base class restriction.
//
// Compared to `IntrusiveSharedPtr`, `SharedPtr` is easier to use, because
// it does not require the object to maintain its own reference count. OTOH
// `IntrusiveSharedPtr` supports custom allocation and deallocation, and
// conversion to an `IntrusiveSharedPtr` to a non-leftmost or virtual base
// class. Prefer `SharedPtr` unless `IntrusiveSharedPtr` is needed.
template <typename T>
class ABSL_ATTRIBUTE_TRIVIAL_ABI ABSL_NULLABILITY_COMPATIBLE SharedPtr
    : public WithEqual<SharedPtr<T>> {
 private:
  template <typename SubT>
  struct IsCompatibleProperSubtype
      : std::conjunction<std::negation<std::is_same<SubT, T>>,
                         std::is_convertible<SubT*, T*>,
                         std::disjunction<std::is_same<std::remove_cv_t<SubT>,
                                                       std::remove_cv_t<T>>,
                                          std::has_virtual_destructor<T>>> {};

 public:
  // Creates an empty `SharedPtr`.
  constexpr SharedPtr() = default;
  /*implicit*/ constexpr SharedPtr(std::nullptr_t) noexcept {}
  SharedPtr& operator=(std::nullptr_t) {
    Reset();
    return *this;
  }

  // Creates a `SharedPtr` holding a constructed value.
  explicit SharedPtr(Initializer<T> value) : ptr_(New(std::move(value))) {}

  // Creates a `SharedPtr` holding a constructed value of a compatible type.
  template <
      typename SubInitializer,
      std::enable_if_t<
          IsCompatibleProperSubtype<TargetT<SubInitializer>>::value, int> = 0>
  explicit SharedPtr(SubInitializer&& value)
      : ptr_(UpCast(New<TargetT<SubInitializer>>(
            std::forward<SubInitializer>(value)))) {}

  // Converts from a `SharedPtr` with a compatible type.
  template <typename SubT,
            std::enable_if_t<IsCompatibleProperSubtype<SubT>::value, int> = 0>
  /*implicit*/ SharedPtr(const SharedPtr<SubT>& that) noexcept
      : ptr_(UpCast(Ref(that.ptr_.get()))) {}
  template <typename SubT,
            std::enable_if_t<IsCompatibleProperSubtype<SubT>::value, int> = 0>
  SharedPtr& operator=(const SharedPtr<SubT>& that) noexcept {
    ptr_.reset(UpCast(Ref(that.ptr_.get())));
    return *this;
  }

  // Converts from a `SharedPtr` with a compatible type.
  //
  // The source `SharedPtr` is left empty.
  template <typename SubT,
            std::enable_if_t<IsCompatibleProperSubtype<SubT>::value, int> = 0>
  /*implicit*/ SharedPtr(SharedPtr<SubT>&& that) noexcept
      : ptr_(UpCast(that.Release())) {}
  template <typename SubT,
            std::enable_if_t<IsCompatibleProperSubtype<SubT>::value, int> = 0>
  SharedPtr& operator=(SharedPtr<SubT>&& that) noexcept {
    ptr_.reset(UpCast(that.Release()));
    return *this;
  }

  SharedPtr(const SharedPtr& that) noexcept : ptr_(Ref(that.ptr_.get())) {}
  SharedPtr& operator=(const SharedPtr& that) noexcept {
    ptr_.reset(Ref(that.ptr_.get()));
    return *this;
  }

  // The source `SharedPtr` is left empty.
  SharedPtr(SharedPtr&& that) = default;
  SharedPtr& operator=(SharedPtr&& that) = default;

  // Makes `*this` empty.
  //
  // The old object, if any, is destroyed afterwards.
  ABSL_ATTRIBUTE_REINITIALIZES
  void Reset(std::nullptr_t = nullptr) { ptr_.reset(); }

  // Replaces the object with a constructed value.
  //
  // The old object, if any, is destroyed afterwards.
  //
  // If `*this` is the only owner of an object known to have the same
  // move-assignable type, the existing object is assigned or reset instead of
  // allocating and constructing a new object.
  ABSL_ATTRIBUTE_REINITIALIZES
  void Reset(Initializer<T> value) { ResetImpl(std::move(value)); }

  // Replaces the object with a constructed value of a compatible type.
  //
  // The old object, if any, is destroyed afterwards.
  template <
      typename SubInitializer,
      std::enable_if_t<
          IsCompatibleProperSubtype<TargetT<SubInitializer>>::value, int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(SubInitializer&& value) {
    ptr_.reset(UpCast(
        New<TargetT<SubInitializer>>(std::forward<SubInitializer>(value))));
  }

  // Returns `true` if `*this` is the only owner of the object.
  //
  // This can be used to check if the object may be modified (in contrast to
  // `std::shared_ptr::unique()`).
  //
  // If `*this` is empty, returns `false`.
  bool IsUnique() const {
    return ptr_ != nullptr && ref_count(ptr_.get()).HasUniqueOwner();
  }

  // Returns the current reference count.
  //
  // If the `SharedPtr` is accessed by multiple threads, this is a snapshot of
  // the count which may change asynchronously, hence usage of `GetRefCount()`
  // should be limited to cases not important for correctness, like producing
  // debugging output.
  //
  // The reference count can be reliably compared against 1 with `IsUnique()`.
  size_t GetRefCount() const {
    if (ptr_ == nullptr) return 0;
    return ref_count(ptr_.get()).GetCount();
  }

  // Returns the pointer.
  T* get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return ptr_.get(); }

  // Dereferences the pointer.
  T& operator*() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT_NE(ptr_, nullptr)
        << "Failed precondition of SharedPtr::operator*: null pointer";
    return *ptr_;
  }
  T* operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT_NE(ptr_, nullptr)
        << "Failed precondition of SharedPtr::operator->: null pointer";
    return ptr_.get();
  }

  // Returns the pointer, releasing its ownership; the `SharedPtr` is left
  // empty. The returned pointer must be deleted using `DeleteReleased()`.
  //
  // If the returned pointer is `nullptr`, it allowed but not required to call
  // `DeleteReleased()`.
  T* Release() { return ptr_.release(); }

  // Deletes the pointer obtained by `Release()`.
  //
  // Does nothing if `ptr == nullptr`.
  static void DeleteReleased(T* ptr) {
    if (ptr != nullptr) Unrefer()(ptr);
  }

  template <typename OtherT>
  friend bool operator==(const SharedPtr& a, const SharedPtr<OtherT>& b) {
    return a.ptr_ == b.ptr_;
  }
  friend bool operator==(const SharedPtr& a, std::nullptr_t) {
    return a.ptr_ == nullptr;
  }

  // Indicates support for:
  //  * `ExternalRef(const SharedPtr&, substr)`
  //  * `ExternalRef(SharedPtr&&, substr)`
  friend void RiegeliSupportsExternalRef(const SharedPtr*) {}

  // Supports `ExternalRef`.
  friend ExternalStorage RiegeliToExternalStorage(SharedPtr* self) {
    return ExternalStorage(
        const_cast<std::remove_cv_t<T>*>(self->Release()),
        [](void* ptr) { SharedPtr::DeleteReleased(static_cast<T*>(ptr)); });
  }

  // Supports `riegeli::Debug()`.
  template <typename DebugStream>
  friend void RiegeliDebug(const SharedPtr& src, DebugStream& dest) {
    dest.Debug(src.get());
  }

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const SharedPtr* self,
                                        MemoryEstimator& memory_estimator) {
    if (memory_estimator.RegisterNode(self->get())) {
      self->RegisterSubobjects(memory_estimator);
    }
  }

 private:
  // For converting from a `SharedPtr` with a compatible type.
  template <typename SubT>
  friend class SharedPtr;

  using pointer = T*;  // For `ABSL_NULLABILITY_COMPATIBLE`.

  // An object of type `SubT` is allocated together with `RefCount` if
  // `!std::has_virtual_destructor_v<SubT>`, or `Control` otherwise.
  //
  // `RefCount` or `Control` immediately precede the object. If the object has
  // a higher alignment requirement than `RefCount` or `Control`, there can be
  // padding at the beginning of the allocation, before `RefCount` or `Control`.
  // Hence if `std::has_virtual_destructor_v<SubT>` then the beginning of the
  // allocation is known only to `Control::destroy()`.
  struct Control {
    explicit Control(void (*destroy)(void* ptr)) : destroy(destroy) {}

    void (*destroy)(void* ptr);
    RefCount ref_count;
  };

  struct Unrefer {
    void operator()(T* ptr) const {
      if (ref_count(ptr).Unref()) Delete(ptr);
    }
  };

  template <typename SubT>
  static void DestroyMethod(void* ptr) {
    static_cast<SubT*>(ptr)->SubT::~SubT();
    static constexpr size_t kOffset = RoundUp<alignof(SubT)>(sizeof(Control));
    void* const allocated_ptr = static_cast<char*>(ptr) - kOffset;
    DeleteAligned<void, UnsignedMax(alignof(Control), alignof(SubT))>(
        allocated_ptr, kOffset + sizeof(SubT));
  }

  template <typename SubT>
  static T* UpCast(SubT* ptr) {
    T* const super_ptr = ptr;
    RIEGELI_CHECK(
        static_cast<void*>(const_cast<std::remove_cv_t<T>*>(super_ptr)) ==
        static_cast<void*>(const_cast<std::remove_cv_t<SubT>*>(ptr)))
        << "SharedPtr does not support upcasting "
           "to a non-leftmost or virtual base class";
    return super_ptr;
  }

  template <typename SubT>
  static SubT* New(Initializer<SubT> value) {
    if constexpr (!std::has_virtual_destructor_v<SubT>) {
      static constexpr size_t kOffset =
          RoundUp<alignof(SubT)>(sizeof(RefCount));
      void* const allocated_ptr =
          NewAligned<void, UnsignedMax(alignof(RefCount), alignof(SubT))>(
              kOffset + sizeof(SubT));
      void* const ptr = static_cast<char*>(allocated_ptr) + kOffset;
      new (static_cast<RefCount*>(ptr) - 1) RefCount();
      new (ptr) SubT(std::move(value).Construct());
      return std::launder(static_cast<SubT*>(ptr));
    } else {
      static constexpr size_t kOffset = RoundUp<alignof(SubT)>(sizeof(Control));
      void* const allocated_ptr =
          NewAligned<void, UnsignedMax(alignof(Control), alignof(SubT))>(
              kOffset + sizeof(SubT));
      void* const ptr = static_cast<char*>(allocated_ptr) + kOffset;
      new (static_cast<Control*>(ptr) - 1) Control(DestroyMethod<SubT>);
      new (ptr) SubT(std::move(value).Construct());
      return std::launder(static_cast<SubT*>(ptr));
    }
  }

  static void Delete(T* ptr) {
    if constexpr (!std::has_virtual_destructor_v<T>) {
      ptr->~T();
      static constexpr size_t kOffset = RoundUp<alignof(T)>(sizeof(RefCount));
      void* const allocated_ptr =
          reinterpret_cast<char*>(const_cast<std::remove_cv_t<T>*>(ptr)) -
          kOffset;
      DeleteAligned<void, UnsignedMax(alignof(RefCount), alignof(T))>(
          allocated_ptr, kOffset + sizeof(T));
    } else if constexpr (std::is_final_v<T>) {
      ptr->~T();
      static constexpr size_t kOffset = RoundUp<alignof(T)>(sizeof(Control));
      void* const allocated_ptr =
          reinterpret_cast<char*>(const_cast<std::remove_cv_t<T>*>(ptr)) -
          kOffset;
      DeleteAligned<void, UnsignedMax(alignof(Control), alignof(T))>(
          allocated_ptr, kOffset + sizeof(T));
    } else {
      control(ptr).destroy(const_cast<std::remove_cv_t<T>*>(ptr));
    }
  }

  template <typename SubT>
  static Control& control(SubT* ptr) {
    static_assert(
        std::has_virtual_destructor_v<SubT>,
        "control() is used only with a type with a virtual destructor");
    return *std::launder(
        reinterpret_cast<Control*>(const_cast<std::remove_cv_t<SubT>*>(ptr)) -
        1);
  }

  template <typename SubT>
  static RefCount& ref_count(SubT* ptr) {
    if constexpr (!std::has_virtual_destructor_v<SubT>) {
      return *std::launder(reinterpret_cast<RefCount*>(
                               const_cast<std::remove_cv_t<SubT>*>(ptr)) -
                           1);
    } else {
      return control(ptr).ref_count;
    }
  }

  template <typename SubT>
  static SubT* Ref(SubT* ptr) {
    if (ptr != nullptr) ref_count(ptr).Ref();
    return ptr;
  }

  template <typename DependentT>
  struct IsAssignable
      : public std::conjunction<
            std::disjunction<
                std::negation<std::has_virtual_destructor<DependentT>>,
                std::is_final<DependentT>>,
            std::is_move_assignable<DependentT>> {};

  void ResetImpl(Initializer<T> value) {
    if constexpr (IsAssignable<T>::value) {
      if (IsUnique()) {
        *ptr_ = std::move(value);
        return;
      }
    }
    ptr_.reset(New(std::move(value)));
  }

  template <typename MemoryEstimator>
  void RegisterSubobjects(MemoryEstimator& memory_estimator) const {
    if constexpr (!std::has_virtual_destructor_v<T>) {
      static constexpr size_t kOffset = RoundUp<alignof(T)>(sizeof(RefCount));
      void* const allocated_ptr =
          reinterpret_cast<char*>(
              const_cast<std::remove_cv_t<T>*>(ptr_.get())) -
          kOffset;
      memory_estimator.RegisterDynamicMemory(allocated_ptr,
                                             kOffset + sizeof(T));
      memory_estimator.RegisterSubobjects(ptr_.get());
    } else if constexpr (std::is_final_v<T>) {
      static constexpr size_t kOffset = RoundUp<alignof(T)>(sizeof(Control));
      void* const allocated_ptr =
          reinterpret_cast<char*>(
              const_cast<std::remove_cv_t<T>*>(ptr_.get())) -
          kOffset;
      memory_estimator.RegisterDynamicMemory(allocated_ptr,
                                             kOffset + sizeof(T));
      memory_estimator.RegisterSubobjects(ptr_.get());
    } else {
      static constexpr size_t kOffset = RoundUp<alignof(T)>(sizeof(Control));
      // `kOffset` is not necessarily accurate because the object can be of a
      // subtype of `T`, so do not pass `allocated_ptr` to
      // `RegisterDynamicMemory()`.
      memory_estimator.RegisterDynamicMemory(
          kOffset + memory_estimator.DynamicSizeOf(ptr_.get()));
      memory_estimator.RegisterSubobjects(ptr_.get());
    }
  }

  std::unique_ptr<T, Unrefer> ptr_;
};

template <typename T>
explicit SharedPtr(T&& value) -> SharedPtr<TargetT<T>>;

}  // namespace riegeli

#endif  // RIEGELI_BASE_SHARED_PTR_H_


================================================
FILE: riegeli/base/sized_shared_buffer.cc
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/base/sized_shared_buffer.h"

#include <stddef.h>

#include <cstring>
#include <limits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/shared_buffer.h"

namespace riegeli {

void SizedSharedBuffer::ShrinkSlow(size_t max_size) {
  RIEGELI_ASSERT_GE(max_size, size_)
      << "Failed precondition of SizedSharedBuffer::ShrinkSlow(): "
         "max_size less than current size";
  if (size_ == 0) {
    buffer_ = SharedBuffer();
    data_ = nullptr;
    return;
  }
  SharedBuffer new_buffer(max_size);
  char* const new_data = new_buffer.mutable_data();
  std::memcpy(new_data, data_, size_);
  data_ = new_data;
  buffer_ = std::move(new_buffer);
}

inline size_t SizedSharedBuffer::space_before() const {
  RIEGELI_ASSERT(data_ != nullptr || buffer_.data() == nullptr)
      << "Failed precondition of SizedSharedBuffer::space_before(): null data_";
  return PtrDistance(buffer_.data(), data_);
}

inline size_t SizedSharedBuffer::space_after() const {
  RIEGELI_ASSERT(data_ != nullptr || buffer_.data() == nullptr)
      << "Failed precondition of SizedSharedBuffer::space_after(): null data_";
  return PtrDistance(data_ + size_, buffer_.data() + buffer_.capacity());
}

inline bool SizedSharedBuffer::CanAppendMovingData(size_t length,
                                                   size_t& min_length_if_not) {
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - size_)
      << "Failed precondition of SizedSharedBuffer::CanAppendMovingData(): "
         "SizedSharedBuffer size overflow";
  if (buffer_.IsUnique()) {
    if (empty()) data_ = buffer_.mutable_data();
    if (space_after() >= length) return true;
    if (size_ + length <= capacity() && 2 * size_ <= capacity()) {
      // Existing array has enough capacity and is at most half full: move
      // contents to the beginning of the array. This is enough to make the
      // amortized cost of adding one element constant as long as prepending
      // leaves space at both ends.
      char* const new_data = buffer_.mutable_data();
      std::memmove(new_data, data_, size_);
      data_ = new_data;
      return true;
    }
    min_length_if_not = UnsignedClamp(
        SaturatingAdd(empty() ? capacity() : space_after(), capacity() / 2),
        length, std::numeric_limits<size_t>::max() - size_);
  } else {
    min_length_if_not = length;
  }
  return false;
}

inline bool SizedSharedBuffer::CanPrependMovingData(size_t length,
                                                    size_t& space_after_if_not,
                                                    size_t& min_length_if_not) {
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - size_)
      << "Failed precondition of SizedSharedBuffer::CanPrependMovingData(): "
         "SizedSharedBuffer size overflow";
  if (buffer_.IsUnique()) {
    if (empty()) data_ = buffer_.mutable_data() + buffer_.capacity();
    if (space_before() >= length) return true;
    if (size_ + length <= capacity() && 2 * size_ <= capacity()) {
      // Existing array has enough capacity and is at most half full: move
      // contents to the middle of the array. This makes the amortized cost of
      // adding one element constant.
      char* const new_data =
          buffer_.mutable_data() + (capacity() - size_ + length) / 2;
      std::memmove(new_data, data_, size_);
      data_ = new_data;
      return true;
    }
    min_length_if_not = UnsignedClamp(
        SaturatingAdd(empty() ? capacity() : space_before(), capacity() / 2),
        length, std::numeric_limits<size_t>::max() - size_);
    space_after_if_not =
        UnsignedMin(space_after(), std::numeric_limits<size_t>::max() - size_ -
                                       min_length_if_not);
  } else {
    min_length_if_not = length;
    space_after_if_not = 0;
  }
  return false;
}

inline size_t SizedSharedBuffer::NewCapacity(size_t extra_space,
                                             size_t min_length,
                                             size_t recommended_length) const {
  RIEGELI_ASSERT_LE(extra_space, std::numeric_limits<size_t>::max() - size_)
      << "Failed precondition of SizedSharedBuffer::NewCapacity(): "
         "SizedSharedBuffer size overflow";
  const size_t existing_space = size_ + extra_space;
  RIEGELI_ASSERT_LE(min_length,
                    std::numeric_limits<size_t>::max() - existing_space)
      << "Failed precondition of SizedSharedBuffer::NewCapacity(): "
         "SizedSharedBuffer size overflow";
  return existing_space +
         UnsignedClamp(recommended_length, min_length,
                       std::numeric_limits<size_t>::max() - existing_space);
}

absl::Span<char> SizedSharedBuffer::AppendBuffer(
    size_t min_length, size_t recommended_length,
    size_t max_length) ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_ASSERT_LE(min_length, max_length)
      << "Failed precondition of SizedSharedBuffer::AppendBuffer(): "
         "min_length > max_length";
  RIEGELI_CHECK_LE(min_length, std::numeric_limits<size_t>::max() - size())
      << "Failed precondition of SizedSharedBuffer::AppendBuffer(): "
         "SizedSharedBuffer size overflow";
  size_t new_min_length;
  if (!CanAppendMovingData(min_length, new_min_length)) {
    if (min_length == 0) return absl::Span<char>();
    // Reallocate the array, without keeping space before the contents. This
    // is enough to make the amortized cost of adding one element constant if
    // prepending leaves space at both ends.
    const size_t new_capacity =
        NewCapacity(0, new_min_length, recommended_length);
    if (empty()) {
      buffer_.Reset(new_capacity);
    } else {
      SharedBuffer new_buffer(new_capacity);
      std::memcpy(new_buffer.mutable_data(), data_, size_);
      buffer_ = std::move(new_buffer);
    }
    data_ = buffer_.mutable_data();
  }
  const size_t length = UnsignedMin(space_after(), max_length);
  const absl::Span<char> buffer(data_ + size_, length);
  size_ += length;
  return buffer;
}

absl::Span<char> SizedSharedBuffer::PrependBuffer(
    size_t min_length, size_t recommended_length,
    size_t max_length) ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_ASSERT_LE(min_length, max_length)
      << "Failed precondition of SizedSharedBuffer::PrependBuffer(): "
         "min_length > max_length";
  RIEGELI_CHECK_LE(min_length, std::numeric_limits<size_t>::max() - size())
      << "Failed precondition of SizedSharedBuffer::PrependBuffer(): "
         "SizedSharedBuffer size overflow";
  size_t space_after, new_min_length;
  if (!CanPrependMovingData(min_length, space_after, new_min_length)) {
    if (min_length == 0) return absl::Span<char>();
    // Reallocate the array, keeping space after the contents unchanged. This
    // makes the amortized cost of adding one element constant.
    const size_t new_capacity =
        NewCapacity(space_after, new_min_length, recommended_length);
    char* new_data;
    if (empty()) {
      buffer_.Reset(new_capacity);
      new_data = buffer_.mutable_data() + buffer_.capacity() - space_after;
    } else {
      SharedBuffer new_buffer(new_capacity);
      new_data = new_buffer.mutable_data() + new_buffer.capacity() -
                 space_after - size_;
      std::memcpy(new_data, data_, size_);
      buffer_ = std::move(new_buffer);
    }
    data_ = new_data;
  }
  const size_t length = UnsignedMin(space_before(), max_length);
  data_ -= length;
  size_ += length;
  return absl::Span<char>(data_, length);
}

absl::Span<char> SizedSharedBuffer::AppendBufferIfExisting(size_t length)
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  size_t new_min_length;
  if (ABSL_PREDICT_FALSE(length >
                         std::numeric_limits<size_t>::max() - size()) ||
      !CanAppendMovingData(length, new_min_length)) {
    return absl::Span<char>();
  }
  const absl::Span<char> buffer(data_ + size_, length);
  size_ += length;
  return buffer;
}

absl::Span<char> SizedSharedBuffer::PrependBufferIfExisting(size_t length)
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  size_t space_after, new_min_length;
  if (ABSL_PREDICT_FALSE(length >
                         std::numeric_limits<size_t>::max() - size()) ||
      !CanPrependMovingData(length, space_after, new_min_length)) {
    return absl::Span<char>();
  }
  data_ -= length;
  size_ += length;
  return absl::Span<char>(data_, length);
}

}  // namespace riegeli


================================================
FILE: riegeli/base/sized_shared_buffer.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_SIZED_SHARED_BUFFER_H_
#define RIEGELI_BASE_SIZED_SHARED_BUFFER_H_

#include <stddef.h>

#include <limits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/shared_buffer.h"

namespace riegeli {

// Dynamically allocated byte buffer.
//
// Like `SharedBuffer`, but keeps track of the substring which is used.
class ABSL_ATTRIBUTE_TRIVIAL_ABI SizedSharedBuffer {
 public:
  // A sentinel value for the `max_length` parameter of
  // `AppendBuffer()`/`PrependBuffer()`.
  static constexpr size_t kAnyLength = std::numeric_limits<size_t>::max();

  SizedSharedBuffer() = default;

  SizedSharedBuffer(const SizedSharedBuffer& that) = default;
  SizedSharedBuffer& operator=(const SizedSharedBuffer& that) = default;

  // The source `SizedSharedBuffer` is left empty.
  SizedSharedBuffer(SizedSharedBuffer&& that) noexcept;
  SizedSharedBuffer& operator=(SizedSharedBuffer&& that) noexcept;

  // Removes all data.
  ABSL_ATTRIBUTE_REINITIALIZES void Clear();

  /*implicit*/ operator absl::string_view() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return absl::string_view(data_, size_);
  }

  // Returns `true` if the data size is 0.
  bool empty() const { return size_ == 0; }

  // Returns the data pointer.
  const char* data() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return data_; }

  // Returns the data size.
  size_t size() const { return size_; }

  // Returns the allocated size, to which the `SizedSharedBuffer` can be resized
  // without reallocation.
  size_t capacity() const { return buffer_.capacity(); }

  // Reduces the allocation if the capacity would be wasteful for
  // `max(size(), max_size)`, assuming that `max_size` will be needed later.
  void Shrink(size_t max_size = 0);

  // Removes all data.
  //
  // Drops the allocation if the capacity would be wasteful for `max_size`.
  ABSL_ATTRIBUTE_REINITIALIZES void ClearAndShrink(size_t max_size = 0);

  // Appends/prepends some uninitialized space. The buffer will have length at
  // least `min_length`, preferably `recommended_length`, and at most
  // `max_length`.
  //
  // If `min_length == 0`, returns whatever space was already allocated
  // (possibly an empty buffer) without invalidating existing pointers. If the
  // `SizedSharedBuffer` was empty then the empty contents can be moved.
  //
  // If `recommended_length < min_length`, `recommended_length` is assumed to be
  // `min_length`.
  //
  // If `max_length == kAnyLength`, there is no maximum.
  //
  // Precondition: `min_length <= max_length`
  absl::Span<char> AppendBuffer(
      size_t min_length, size_t recommended_length = 0,
      size_t max_length = kAnyLength) ABSL_ATTRIBUTE_LIFETIME_BOUND;
  absl::Span<char> PrependBuffer(
      size_t min_length, size_t recommended_length = 0,
      size_t max_length = kAnyLength) ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Equivalent to `AppendBuffer()`/`PrependBuffer()` with
  // `min_length == max_length`.
  absl::Span<char> AppendFixedBuffer(size_t length)
      ABSL_ATTRIBUTE_LIFETIME_BOUND;
  absl::Span<char> PrependFixedBuffer(size_t length)
      ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Appends/prepends some uninitialized space with the given `length` if this
  // is possible without invalidating existing pointers, otherwise returns an
  // empty buffer. If the `SizedSharedBuffer` was empty then the empty contents
  // can be moved.
  //
  // In contrast to `AppendBuffer(0, length, length)`, the returned buffer has
  // size either 0 or `length`, nothing between.
  absl::Span<char> AppendBufferIfExisting(size_t length)
      ABSL_ATTRIBUTE_LIFETIME_BOUND;
  absl::Span<char> PrependBufferIfExisting(size_t length)
      ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Removes suffix/prefix of the given length.
  //
  // Precondition: `length <= size()`
  void RemoveSuffix(size_t length);
  void RemovePrefix(size_t length);

  // Indicates support for:
  //  * `ExternalRef(const SizedSharedBuffer&)`
  //  * `ExternalRef(SizedSharedBuffer&&)`
  //  * `ExternalRef(const SizedSharedBuffer&, substr)`
  //  * `ExternalRef(SizedSharedBuffer&&, substr)`
  friend void RiegeliSupportsExternalRef(const SizedSharedBuffer*) {}

  // Supports `ExternalRef`.
  template <typename Callback>
  friend void RiegeliExternalDelegate(SizedSharedBuffer* self,
                                      absl::string_view substr,
                                      Callback&& delegate_to) {
    self->data_ = nullptr;
    self->size_ = 0;
    std::forward<Callback>(delegate_to)(std::move(self->buffer_), substr);
  }
  template <typename Callback>
  friend void RiegeliExternalDelegate(const SizedSharedBuffer* self,
                                      absl::string_view substr,
                                      Callback&& delegate_to) {
    std::forward<Callback>(delegate_to)(self->buffer_, substr);
  }

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const SizedSharedBuffer* self,
                                        MemoryEstimator& memory_estimator) {
    memory_estimator.RegisterSubobjects(&self->buffer_);
  }

 private:
  explicit SizedSharedBuffer(SharedBuffer buffer, char* data, size_t size)
      : buffer_(std::move(buffer)), data_(data), size_(size) {}

  void ShrinkSlow(size_t max_size);

  size_t space_before() const;
  size_t space_after() const;
  bool CanAppendMovingData(size_t length, size_t& min_length_if_not);
  bool CanPrependMovingData(size_t length, size_t& space_after_if_not,
                            size_t& min_length_if_not);

  size_t NewCapacity(size_t extra_space, size_t min_length,
                     size_t recommended_length) const;

  void RemoveSuffixSlow(size_t length);
  void RemovePrefixSlow(size_t length);

  SharedBuffer buffer_;
  // Invariant:
  //   `(data_ == nullptr && size_ == 0) ||
  //    (data_ >= buffer_.data() &&
  //     data_ + size_ <= buffer_.data() + buffer_.capacity())`
  char* data_ = nullptr;
  size_t size_ = 0;
};

// Implementation details follow.

inline SizedSharedBuffer::SizedSharedBuffer(SizedSharedBuffer&& that) noexcept
    : buffer_(std::move(that.buffer_)),
      data_(std::exchange(that.data_, nullptr)),
      size_(std::exchange(that.size_, 0)) {}

inline SizedSharedBuffer& SizedSharedBuffer::operator=(
    SizedSharedBuffer&& that) noexcept {
  buffer_ = std::move(that.buffer_);
  data_ = std::exchange(that.data_, nullptr);
  size_ = std::exchange(that.size_, 0);
  return *this;
}

inline void SizedSharedBuffer::Clear() { size_ = 0; }

inline void SizedSharedBuffer::Shrink(size_t max_size) {
  max_size = UnsignedMax(max_size, size_);
  if (capacity() > max_size && Wasteful(capacity(), max_size)) {
    ShrinkSlow(max_size);
  }
}

inline void SizedSharedBuffer::ClearAndShrink(size_t max_size) {
  size_ = 0;
  if (capacity() > max_size && Wasteful(capacity(), max_size)) {
    buffer_ = SharedBuffer();
    data_ = nullptr;
  }
}

inline absl::Span<char> SizedSharedBuffer::AppendFixedBuffer(size_t length)
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return AppendBuffer(length, length, length);
}

inline absl::Span<char> SizedSharedBuffer::PrependFixedBuffer(size_t length)
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return PrependBuffer(length, length, length);
}

inline void SizedSharedBuffer::RemoveSuffix(size_t length) {
  RIEGELI_CHECK_LE(length, size())
      << "Failed precondition of SizedSharedBuffer::RemoveSuffix(): "
      << "length to remove greater than current size";
  size_ -= length;
}

inline void SizedSharedBuffer::RemovePrefix(size_t length) {
  RIEGELI_CHECK_LE(length, size())
      << "Failed precondition of SizedSharedBuffer::RemovePrefix(): "
      << "length to remove greater than current size";
  data_ += length;
  size_ -= length;
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_SIZED_SHARED_BUFFER_H_


================================================
FILE: riegeli/base/stable_dependency.h
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_STABLE_DEPENDENCY_H_
#define RIEGELI_BASE_STABLE_DEPENDENCY_H_

#include <atomic>
#include <memory>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/type_traits.h"

namespace riegeli {

// `StableDependency<Handle, Manager>` is similar to
// `Dependency<Handle, Manager>`, but ensures that `Handle` stays unchanged when
// the `StableDependency<Handle, Manager>` is moved.
//
// `StableDependency` can be used instead of `Dependency` if `Handle` stability
// is required, e.g. if background threads access the `Handle`.

// This template is specialized but does not have a primary definition.
template <typename Handle, typename Manager, typename Enable = void>
class StableDependency;

namespace dependency_internal {

template <typename Handle, typename Manager>
class StableDependencyDefault
    : public PropagateStaticIsOwning<Dependency<Handle, Manager>> {
 public:
  StableDependencyDefault() = default;

  explicit StableDependencyDefault(Initializer<Manager> manager)
      : dep_(new Dependency<Handle, Manager>(std::move(manager))),
        ensure_allocated_(AssumeAllocatedSlow) {}

  StableDependencyDefault(StableDependencyDefault&& that) noexcept
      : dep_(that.dep_.exchange(nullptr, std::memory_order_relaxed)),
        ensure_allocated_(
            std::exchange(that.ensure_allocated_, EnsureAllocatedSlow)) {}
  StableDependencyDefault& operator=(StableDependencyDefault&& that) noexcept {
    delete dep_.exchange(that.dep_.exchange(nullptr, std::memory_order_relaxed),
                         std::memory_order_relaxed);
    ensure_allocated_ =
        std::exchange(that.ensure_allocated_, EnsureAllocatedSlow);
    return *this;
  }

  ~StableDependencyDefault() { delete dep_.load(std::memory_order_relaxed); }

  ABSL_ATTRIBUTE_REINITIALIZES void Reset() {
    Dependency<Handle, Manager>* const dep =
        dep_.load(std::memory_order_relaxed);
    if (dep != nullptr) dep->Reset();
  }

  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Manager> manager) {
    Dependency<Handle, Manager>* const dep =
        dep_.load(std::memory_order_relaxed);
    if (dep == nullptr) {
      // A race would violate the contract because this is not a const method.
      dep_.store(new Dependency<Handle, Manager>(std::move(manager)),
                 std::memory_order_relaxed);
    } else {
      dep->Reset(std::move(manager));
    }
  }

  Manager& manager() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return EnsureAllocated().manager();
  }
  const Manager& manager() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return EnsureAllocated().manager();
  }

  typename Dependency<Handle, Manager>::Subhandle get() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return EnsureAllocated().get();
  }

  bool IsOwning() const { return EnsureAllocated().IsOwning(); }

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const StableDependencyDefault* self,
                                        MemoryEstimator& memory_estimator) {
    Dependency<Handle, Manager>* const dep =
        self->dep_.load(std::memory_order_acquire);
    if (dep != nullptr) memory_estimator.RegisterDynamicObject(dep);
  }

 private:
  Dependency<Handle, Manager>& EnsureAllocated() const {
    Dependency<Handle, Manager>* const dep =
        dep_.load(std::memory_order_acquire);
    if (ABSL_PREDICT_TRUE(dep != nullptr)) return *dep;
    return ensure_allocated_(*this);
  }

  static Dependency<Handle, Manager>& EnsureAllocatedSlow(
      const StableDependencyDefault& self);
  static Dependency<Handle, Manager>& AssumeAllocatedSlow(
      const StableDependencyDefault& self);

  // Owned. `nullptr` is equivalent to a default constructed `Dependency`.
  mutable std::atomic<Dependency<Handle, Manager>*> dep_ = nullptr;
  // Handles the case when `dep_ == nullptr`.
  //
  // The indirection allows initialization from `Initializer<Manager>` even if
  // `Dependency<Handle, Manager>` appears to be default-constructible but its
  // default constructor does not compile, by avoiding dead code which would
  // call the default constructor.
  //
  // Invariant:
  //   if `dep_ == nullptr` then `ensure_allocated_ == EnsureAllocatedSlow`
  Dependency<Handle, Manager>& (*ensure_allocated_)(
      const StableDependencyDefault&) = EnsureAllocatedSlow;
};

template <typename Handle, typename Manager>
class StableDependencyNoDefault
    : public PropagateStaticIsOwning<Dependency<Handle, Manager>> {
 public:
  explicit StableDependencyNoDefault(Initializer<Manager> manager)
      : dep_(
            std::make_unique<Dependency<Handle, Manager>>(std::move(manager))) {
  }

  StableDependencyNoDefault(StableDependencyNoDefault&& that) noexcept
      : dep_(std::make_unique<Dependency<Handle, Manager>>(
            std::move(*that.dep_))) {}
  StableDependencyNoDefault& operator=(
      StableDependencyNoDefault&& that) noexcept {
    *dep_ = std::move(*that.dep_);
    return *this;
  }

  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Manager> manager) {
    dep_->Reset(std::move(manager));
  }

  Manager& manager() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dep_->manager(); }
  const Manager& manager() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dep_->manager();
  }

  typename Dependency<Handle, Manager>::Subhandle get() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dep_->get();
  }

  bool IsOwning() const { return dep_->IsOwning(); }

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const StableDependencyNoDefault* self,
                                        MemoryEstimator& memory_estimator) {
    memory_estimator.RegisterSubobjects(&self->dep_);
  }

 private:
  // Never `nullptr`.
  std::unique_ptr<Dependency<Handle, Manager>> dep_;
};

}  // namespace dependency_internal

// Specialization of `StableDependency<Handle, Manager>` when
// `Dependency<Handle, Manager>` is already stable: delegate to it.
template <typename Handle, typename Manager>
class StableDependency<Handle, Manager,
                       std::enable_if_t<Dependency<Handle, Manager>::kIsStable>>
    : public Dependency<Handle, Manager> {
 public:
  using StableDependency::Dependency::Dependency;

  StableDependency(StableDependency&& that) = default;
  StableDependency& operator=(StableDependency&& that) = default;
};

// Specialization of `StableDependency<Handle, Manager>` when
// `Dependency<Handle, Manager>` is not stable but default-constructible:
// allocate the dependency dynamically and conditionally.
template <typename Handle, typename Manager>
class StableDependency<
    Handle, Manager,
    std::enable_if_t<std::conjunction_v<
        std::bool_constant<!Dependency<Handle, Manager>::kIsStable>,
        std::is_default_constructible<Dependency<Handle, Manager>>>>>
    : public dependency_internal::DependencyDerived<
          dependency_internal::StableDependencyDefault<Handle, Manager>, Handle,
          Manager> {
 public:
  using StableDependency::DependencyDerived::DependencyDerived;

  StableDependency(StableDependency&& that) = default;
  StableDependency& operator=(StableDependency&& that) = default;
};

// Specialization of `StableDependency<Handle, Manager>` when
// `Dependency<Handle, Manager>` is not stable and not default-constructible:
// allocate the dependency dynamically and always keep it allocated.
template <typename Handle, typename Manager>
class StableDependency<
    Handle, Manager,
    std::enable_if_t<std::conjunction_v<
        std::bool_constant<!Dependency<Handle, Manager>::kIsStable>,
        std::negation<
            std::is_default_constructible<Dependency<Handle, Manager>>>>>>
    : public dependency_internal::DependencyDerived<
          dependency_internal::StableDependencyNoDefault<Handle, Manager>,
          Handle, Manager>,
      public ConditionallyConstructible<
          false, std::is_move_constructible_v<Dependency<Handle, Manager>>>,
      public ConditionallyAssignable<
          false, std::is_move_assignable_v<Dependency<Handle, Manager>>> {
 public:
  using StableDependency::DependencyDerived::DependencyDerived;

  StableDependency(StableDependency&& that) = default;
  StableDependency& operator=(StableDependency&& that) = default;
};

// Implementation details follow.

namespace dependency_internal {

template <typename Handle, typename Manager>
Dependency<Handle, Manager>&
StableDependencyDefault<Handle, Manager>::EnsureAllocatedSlow(
    const StableDependencyDefault<Handle, Manager>& self) {
  Dependency<Handle, Manager>* const dep = new Dependency<Handle, Manager>();
  Dependency<Handle, Manager>* other_dep = nullptr;
  if (ABSL_PREDICT_FALSE(!self.dep_.compare_exchange_strong(
          other_dep, dep, std::memory_order_acq_rel))) {
    // We lost the race.
    delete dep;
    return *other_dep;
  }
  return *dep;
}

template <typename Handle, typename Manager>
Dependency<Handle, Manager>&
StableDependencyDefault<Handle, Manager>::AssumeAllocatedSlow(
    ABSL_ATTRIBUTE_UNUSED const StableDependencyDefault<Handle, Manager>&
        self) {
  RIEGELI_ASSUME_UNREACHABLE()
      << "Failed invariant of StableDependency: "
         "dep_ == nullptr but ensure_allocated_ == AssumeAllocatedSlow";
}

}  // namespace dependency_internal

}  // namespace riegeli

#endif  // RIEGELI_BASE_STABLE_DEPENDENCY_H_


================================================
FILE: riegeli/base/status.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/base/status.h"

#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"

namespace riegeli {

absl::Status SetMessage(const absl::Status& status, absl::string_view message) {
  absl::Status result(status.code(), message);
  status.ForEachPayload(
      [&](absl::string_view type_url, const absl::Cord& payload) {
        result.SetPayload(type_url, payload);
      });
  return result;
}

absl::Status Annotate(const absl::Status& status, absl::string_view detail) {
  if (status.ok() || detail.empty()) return status;
  return SetMessage(status, status.message().empty()
                                ? detail
                                : absl::StrCat(status.message(), "; ", detail));
}

}  // namespace riegeli


================================================
FILE: riegeli/base/status.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_STATUS_H_
#define RIEGELI_BASE_STATUS_H_

#include <utility>

#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/string_view.h"

namespace riegeli {

// Returns an `absl::Status` that is identical to `status` except that the
// `message()` has been replaced with `message`.
//
// OK status values have no message and therefore if `status` is OK, the result
// is unchanged.
absl::Status SetMessage(const absl::Status& status, absl::string_view message);

// Returns an `absl::Status` that is identical to `status` except that the
// `message()` has been augmented by adding `detail` to the end of the original
// message.
//
// `Annotate()` adds the appropriate separators, so callers should not include a
// separator in `detail`. The exact formatting is subject to change, so you
// should not depend on it in your tests.
//
// OK status values have no message and therefore if `status` is OK, the result
// is unchanged.
absl::Status Annotate(const absl::Status& status, absl::string_view detail);

// `StatusOrMaker<T>::type` and `StatusOrMakerT<T>` generalize
// `absl::StatusOr<T>` for types where that is not applicable:
//  * `absl::StatusOr<const T>`           -> `absl::StatusOr<T>`
//  * `absl::StatusOr<T&>`                -> `absl::StatusOr<T>`
//  * `absl::StatusOr<T&&>`               -> `absl::StatusOr<T>`
//  * `absl::StatusOr<void>`              -> `absl::Status`
//  * `absl::StatusOr<absl::Status>`      -> `absl::Status`
//  * `absl::StatusOr<absl::StatusOr<T>>` -> `absl::StatusOr<T>`
//
// The primary template provides the default implementation. Specializations
// follow.

template <typename T>
struct StatusOrMaker {
  // The combined type.
  using type = absl::StatusOr<T>;

  // Returns `status`.
  static type FromStatus(const absl::Status& status) { return status; }
  static type FromStatus(absl::Status&& status) { return std::move(status); }

  // Returns `work()`.
  template <typename Work>
  static type FromWork(Work&& work) {
    return std::forward<Work>(work)();
  }

  // Replaces `result` with `status` if `result` is OK.
  static void Update(type& result, const absl::Status& status) {
    if (result.ok()) result = status;
  }
};

template <typename T>
struct StatusOrMaker<const T> : StatusOrMaker<T> {};

template <typename T>
struct StatusOrMaker<T&> : StatusOrMaker<T> {};

template <typename T>
struct StatusOrMaker<T&&> : StatusOrMaker<T> {};

template <>
struct StatusOrMaker<void> {
  using type = absl::Status;

  static type FromStatus(const absl::Status& status) { return status; }
  static type FromStatus(absl::Status&& status) { return std::move(status); }

  template <typename Work>
  static type FromWork(Work&& work) {
    std::forward<Work>(work)();
    return absl::OkStatus();
  }

  static void Update(type& result, const absl::Status& status) {
    result.Update(status);
  }
};

template <>
struct StatusOrMaker<absl::Status> {
  using type = absl::Status;

  static type FromStatus(const absl::Status& status) { return status; }
  static type FromStatus(absl::Status&& status) { return std::move(status); }

  template <typename Work>
  static type FromWork(Work&& work) {
    return std::forward<Work>(work)();
  }

  static void Update(type& result, const absl::Status& status) {
    result.Update(status);
  }
};

template <typename T>
struct StatusOrMaker<absl::StatusOr<T>> {
  using type = absl::StatusOr<T>;

  static type FromStatus(const absl::Status& status) { return status; }
  static type FromStatus(absl::Status&& status) { return std::move(status); }

  template <typename Work>
  static type FromWork(Work&& work) {
    return std::forward<Work>(work)();
  }

  static void Update(type& result, const absl::Status& status) {
    if (result.ok()) result = status;
  }
};

template <typename T>
using StatusOrMakerT = typename StatusOrMaker<T>::type;

}  // namespace riegeli

#endif  // RIEGELI_BASE_STATUS_H_


================================================
FILE: riegeli/base/stream_utils.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/base/stream_utils.h"

#include <stddef.h>

#include <cassert>
#include <cstring>
#include <ios>
#include <ostream>

#include "absl/base/nullability.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

void WritePadding(std::ostream& dest, size_t length, char fill) {
  char buffer[64];
  std::memset(buffer, fill, sizeof(buffer));
  while (length > sizeof(buffer)) {
    dest.write(buffer, std::streamsize{sizeof(buffer)});
    length -= sizeof(buffer);
  }
  dest.write(buffer, static_cast<std::streamsize>(length));
}

int StringifyOStream<StringStringifySink>::StringStreambuf::overflow(int src) {
  if (src != traits_type::eof()) dest_->push_back(static_cast<char>(src));
  return traits_type::not_eof(src);
}

std::streamsize StringifyOStream<StringStringifySink>::StringStreambuf::xsputn(
    const char* absl_nullable src, std::streamsize length) {
  assert(length >= 0);
  dest_->append(src, static_cast<size_t>(length));
  return length;
}

}  // namespace riegeli


================================================
FILE: riegeli/base/stream_utils.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_STREAM_UTILS_H_
#define RIEGELI_BASE_STREAM_UTILS_H_

#include <stddef.h>

#include <cassert>
#include <cstring>
#include <ios>
#include <ostream>
#include <streambuf>
#include <string>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/types.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// Writes `length` copies of `fill` to `dest`.
void WritePadding(std::ostream& dest, size_t length, char fill);

// Writes a value to `dest`, including padding configured in `dest`.
// Resets `dest.width()` to 0 afterwards.
//
// `length` is the number of characters in the value. `callback()` is called
// to write the value; it should use unformatted output, i.e. `dest.write()`.
template <typename Callback>
void WriteWithPadding(std::ostream& dest, Position length,
                      Callback&& callback) {
  std::ostream::sentry sentry(dest);
  if (sentry) {
    size_t left_pad = 0;
    size_t right_pad = 0;
    if (dest.width() > 0) {
      if (static_cast<size_t>(dest.width()) > length) {
        const size_t pad =
            static_cast<size_t>(dest.width()) - static_cast<size_t>(length);
        if ((dest.flags() & dest.adjustfield) == dest.left) {
          right_pad = pad;
        } else {
          left_pad = pad;
        }
      }
      dest.width(0);
    }
    if (left_pad > 0) WritePadding(dest, left_pad, dest.fill());
    std::forward<Callback>(callback)();
    if (right_pad > 0) WritePadding(dest, right_pad, dest.fill());
  }
}

// A sink for `AbslStringify()` which appends to a `std::string`.
class StringStringifySink {
 public:
  explicit StringStringifySink(std::string* dest ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : dest_(dest) {}

  StringStringifySink(const StringStringifySink& that) = default;
  StringStringifySink& operator=(const StringStringifySink& that) = default;

  std::string* dest() const { return dest_; }

  void Append(size_t length, char fill) { dest_->append(length, fill); }
  void Append(absl::string_view src) { dest_->append(src); }
  friend void AbslFormatFlush(StringStringifySink* dest,
                              absl::string_view src) {
    dest->Append(src);
  }

 private:
  std::string* dest_;
};

// Adapts `std::ostream` to a sink for `AbslStringify()`.
class OStreamStringifySink {
 public:
  explicit OStreamStringifySink(
      std::ostream* dest ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : dest_(dest) {}

  OStreamStringifySink(const OStreamStringifySink& that) = default;
  OStreamStringifySink& operator=(const OStreamStringifySink& that) = default;

  std::ostream* dest() const { return dest_; }

  void Append(size_t length, char fill) { WritePadding(*dest_, length, fill); }
  void Append(absl::string_view src) {
    dest_->write(src.data(), static_cast<std::streamsize>(src.size()));
  }
  friend void AbslFormatFlush(OStreamStringifySink* dest,
                              absl::string_view src) {
    dest->Append(src);
  }

 private:
  std::ostream* dest_;
};

// Adapts a sink for `AbslStringify()` to `std::ostream`.
template <typename Sink>
class StringifyOStream final : public std::ostream {
 public:
  explicit StringifyOStream(Sink* dest ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : std::ostream(&streambuf_), streambuf_(dest) {}

  StringifyOStream(StringifyOStream&& that) noexcept
      : std::ostream(static_cast<std::ostream&&>(that)),
        streambuf_(std::move(that.streambuf_)) {
    set_rdbuf(&streambuf_);
  }
  StringifyOStream& operator=(StringifyOStream&& that) noexcept {
    std::ostream::operator=(static_cast<std::ostream&&>(that));
    streambuf_ = std::move(that.streambuf_);
    return *this;
  }

 private:
  class StringifyStreambuf;

  StringifyStreambuf streambuf_;
};

template <typename Sink>
explicit StringifyOStream(Sink* dest) -> StringifyOStream<Sink>;

template <>
class StringifyOStream<StringStringifySink> final : public std::ostream {
 public:
  explicit StringifyOStream(std::string* dest ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : std::ostream(&streambuf_), streambuf_(dest) {}

  explicit StringifyOStream(
      StringStringifySink* sink ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : StringifyOStream(sink->dest()) {}

  StringifyOStream(StringifyOStream&& that) noexcept
      : std::ostream(static_cast<std::ostream&&>(that)),
        streambuf_(std::move(that.streambuf_)) {
    set_rdbuf(&streambuf_);
  }
  StringifyOStream& operator=(StringifyOStream&& that) noexcept {
    std::ostream::operator=(static_cast<std::ostream&&>(that));
    streambuf_ = std::move(that.streambuf_);
    return *this;
  }

  std::string* dest() const { return streambuf_.dest(); }

 private:
  class StringStreambuf final : public std::streambuf {
   public:
    explicit StringStreambuf(std::string* dest ABSL_ATTRIBUTE_LIFETIME_BOUND)
        : dest_(dest) {}

    StringStreambuf(const StringStreambuf& that) = default;
    StringStreambuf& operator=(const StringStreambuf& that) = default;

    std::string* dest() const { return dest_; }

   protected:
    int overflow(int src) override;
    std::streamsize xsputn(const char* absl_nullable src,
                           std::streamsize length) override;

   private:
    std::string* dest_;
  };

  StringStreambuf streambuf_;
};

// A faster version of `std::ostringstream`. It does not own the `std::string`
// and does not support random access.
//
// This is similar to `absl::strings_internal::OStringStream`.
using StringOStream = StringifyOStream<StringStringifySink>;

// Implementation details follow.

template <typename Sink>
class StringifyOStream<Sink>::StringifyStreambuf final : public std::streambuf {
 public:
  explicit StringifyStreambuf(Sink* dest ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : dest_(dest) {}

  StringifyStreambuf(const StringifyStreambuf& that) = default;
  StringifyStreambuf& operator=(const StringifyStreambuf& that) = default;

 protected:
  int overflow(int src) override;
  std::streamsize xsputn(const char* absl_nullable src,
                         std::streamsize length) override;

 private:
  Sink* dest_;
};

template <typename Sink>
int StringifyOStream<Sink>::StringifyStreambuf::overflow(int src) {
  if (src != traits_type::eof()) {
    const char ch = static_cast<char>(src);
    dest_->Append(absl::string_view(&ch, 1));
  }
  return traits_type::not_eof(src);
}

template <typename Sink>
std::streamsize StringifyOStream<Sink>::StringifyStreambuf::xsputn(
    const char* absl_nullable src, std::streamsize length) {
  assert(length >= 0);
  assert(src != nullptr || length == 0);
  dest_->Append(absl::string_view(src, static_cast<size_t>(length)));
  return length;
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_STREAM_UTILS_H_


================================================
FILE: riegeli/base/string_ref.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_STRING_REF_H_
#define RIEGELI_BASE_STRING_REF_H_

#include <stddef.h>

#include <ostream>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/config.h"  // IWYU pragma: keep
#include "absl/base/nullability.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/temporary_storage.h"
#include "riegeli/base/type_traits.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// `StringRef` stores an `absl::string_view`, usually representing text data
// (see `BytesRef` for binary data), possibly converted through temporary
// `std::string`.
//
// It is intended for function parameters when the implementation needs
// an `absl::string_view`, and the caller might have another representation
// of the string.
//
// It is convertible from:
//  * types convertible to `absl::string_view`
//  * types convertible to `std::string`, e.g. `StringInitializer`
//
// `StringRef` does not own string contents and is efficiently copyable.
class StringRef : public WithCompare<StringRef> {
 public:
  // Stores an empty `absl::string_view`.
  StringRef() = default;

  // Stores `str` converted to `absl::string_view`.
  ABSL_ATTRIBUTE_ALWAYS_INLINE
  /*implicit*/ StringRef(const char* str ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : str_(str) {}

  // Stores `str`.
  /*implicit*/ StringRef(absl::string_view str ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : str_(str) {}

  // Stores `str` converted to `absl::string_view`.
  template <typename T,
            std::enable_if_t<
                std::conjunction_v<NotSameRef<StringRef, T>,
                                   NotSameRef<absl::string_view, T>,
                                   std::is_convertible<T&&, absl::string_view>>,
                int> = 0>
  /*implicit*/ StringRef(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : str_(std::forward<T>(str)) {}

  // Stores `str` materialized and then converted to `absl::string_view`.
  template <typename T,
            std::enable_if_t<
                std::conjunction_v<
                    NotSameRef<StringRef, T>,
                    std::negation<std::is_convertible<T&&, absl::string_view>>,
                    std::is_convertible<T&&, std::string>>,
                int> = 0>
  /*implicit*/ StringRef(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND,
                         TemporaryStorage<std::string>&& storage
                             ABSL_ATTRIBUTE_LIFETIME_BOUND = {})
      : str_(std::move(storage).emplace(std::forward<T>(str))) {}

  StringRef(const StringRef& that) = default;
  StringRef& operator=(const StringRef&) = delete;

  /*implicit*/ operator absl::string_view() const { return str_; }

  bool empty() const { return size() == 0; }
  const char* absl_nullable data() const { return str_.data(); };
  size_t size() const {
    RIEGELI_ASSUME_LE(str_.size(), str_.max_size());
    return str_.size();
  }

  const char& operator[](size_t index) const;
  const char& at(size_t index) const;
  const char& front() const;
  const char& back() const;

  void remove_prefix(size_t length);
  void remove_suffix(size_t length);

  friend bool operator==(StringRef a, StringRef b) {
    return absl::string_view(a) == absl::string_view(b);
  }
  friend riegeli::StrongOrdering RIEGELI_COMPARE(StringRef a, StringRef b) {
    return riegeli::Compare(absl::string_view(a), absl::string_view(b));
  }

  template <typename T,
            std::enable_if_t<
                std::conjunction_v<NotSameRef<StringRef, T>,
                                   std::is_convertible<T&&, absl::string_view>>,
                int> = 0>
  friend bool operator==(StringRef a, T&& b) {
    return a == StringRef(std::forward<T>(b));
  }
  template <typename T,
            std::enable_if_t<
                std::conjunction_v<NotSameRef<StringRef, T>,
                                   std::is_convertible<T&&, absl::string_view>>,
                int> = 0>
  friend riegeli::StrongOrdering RIEGELI_COMPARE(StringRef a, T&& b) {
    return riegeli::Compare(a, StringRef(std::forward<T>(b)));
  }

  // Default stringification by `absl::StrCat()` etc.
  template <typename Sink>
  friend void AbslStringify(Sink& dest, const StringRef& src) {
    dest.Append(absl::string_view(src));
  }

  friend std::ostream& operator<<(std::ostream& dest, const StringRef& src) {
    return dest << absl::string_view(src);
  }

 private:
  absl::string_view str_;
};

// `StringInitializer` is convertible from the same types as `StringRef`,
// but efficiently takes ownership of `std::string`.
//
// `StringInitializer` behaves like `Initializer<std::string>`.
class StringInitializer : public Initializer<std::string> {
 public:
  StringInitializer() = default;

  // Stores `str` converted to `absl::string_view` and then to `std::string`.
  ABSL_ATTRIBUTE_ALWAYS_INLINE
  /*implicit*/ StringInitializer(
      const char* str ABSL_ATTRIBUTE_LIFETIME_BOUND,
      TemporaryStorage<MakerType<absl::string_view>>&& storage
          ABSL_ATTRIBUTE_LIFETIME_BOUND = {})
      : Initializer(std::move(storage).emplace(absl::string_view(str))) {}

  // Stores `str` converted to `std::string`.
  template <typename T,
            std::enable_if_t<
                std::conjunction_v<NotSameRef<StringInitializer, T>,
                                   std::is_convertible<T&&, std::string>>,
                int> = 0>
  /*implicit*/ StringInitializer(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : Initializer(std::forward<T>(str)) {}

  // Stores `str` converted to `StringRef`, then to `absl::string_view`, and
  // then to `std::string`.
  template <
      typename T,
      std::enable_if_t<std::conjunction_v<
                           NotSameRef<StringInitializer, T>,
                           std::negation<std::is_convertible<T&&, std::string>>,
                           std::is_convertible<T&&, StringRef>>,
                       int> = 0>
  /*implicit*/ StringInitializer(
      T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND,
      TemporaryStorage<MakerType<absl::string_view>>&& storage
          ABSL_ATTRIBUTE_LIFETIME_BOUND = {})
      : Initializer(
            std::move(storage).emplace(StringRef(std::forward<T>(str)))) {}

  StringInitializer(StringInitializer&& that) = default;
  StringInitializer& operator=(StringInitializer&&) = delete;
};

// Implementation details follow.

inline const char& StringRef::operator[](size_t index) const {
  RIEGELI_ASSERT_LT(index, size())
      << "Failed precondition of StringRef::operator[]: index out of range";
  return str_[index];
}

inline const char& StringRef::at(size_t index) const {
  RIEGELI_ASSERT_LT(index, size())
      << "Failed precondition of StringRef::at(): index out of range";
  return str_[index];
}

inline const char& StringRef::front() const {
  RIEGELI_ASSERT(!empty())
      << "Failed precondition of StringRef::front(): empty string";
  return str_.front();
}

inline const char& StringRef::back() const {
  RIEGELI_ASSERT(!empty())
      << "Failed precondition of StringRef::back(): empty string";
  return str_.back();
}

inline void StringRef::remove_prefix(size_t length) {
  RIEGELI_ASSERT_LE(length, size())
      << "Failed precondition of StringRef::remove_prefix(): "
         "length out of range";
  str_.remove_prefix(length);
}

inline void StringRef::remove_suffix(size_t length) {
  RIEGELI_ASSERT_LE(length, size())
      << "Failed precondition of StringRef::remove_suffix(): "
         "length out of range";
  str_.remove_suffix(length);
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_STRING_REF_H_


================================================
FILE: riegeli/base/string_utils.cc
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/base/string_utils.h"

#include <stddef.h>

#include <string>

#include "absl/base/nullability.h"
#include "riegeli/base/arithmetic.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli::string_utils_internal {

void ReserveAmortized(std::string& dest, size_t new_capacity) {
  dest.reserve(dest.capacity() == std::string().capacity()
                   ? new_capacity
                   : UnsignedClamp(dest.capacity() + dest.capacity() / 2,
                                   new_capacity, dest.max_size()));
}

}  // namespace riegeli::string_utils_internal


================================================
FILE: riegeli/base/string_utils.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_STRING_UTILS_H_
#define RIEGELI_BASE_STRING_UTILS_H_

#include <stddef.h>

#include <string>
#include <utility>

#include "absl/base/nullability.h"
#include "absl/strings/resize_and_overwrite.h"
#include "riegeli/base/assert.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

namespace string_utils_internal {

void ReserveAmortized(std::string& dest, size_t new_capacity);

}  // namespace string_utils_internal

// Like `std::string::reserve()`, ensuring that repeated growth has the cost
// proportional to the final size.
//
// If `new_capacity <= dest.capacity()`, does nothing like in C++20.
// In earlier C++ versions this was a non-binding shrink request.
inline void StringReserveAmortized(std::string& dest, size_t new_capacity) {
  if (new_capacity > dest.capacity()) {
    string_utils_internal::ReserveAmortized(dest, new_capacity);
  }
  RIEGELI_ASSUME_GE(dest.capacity(), new_capacity);
}

// Like `std::string::resize()`, ensuring that repeated growth has the cost
// proportional to the final size.
inline void StringResizeAmortized(std::string& dest, size_t new_size) {
  StringReserveAmortized(dest, new_size);
  dest.resize(new_size);
}

// Like `absl::StringResizeAndOverwrite()`, ensuring that repeated growth has
// the cost proportional to the final size.
template <typename Op>
inline void StringResizeAndOverwriteAmortized(std::string& dest,
                                              size_t new_size, Op op) {
  StringReserveAmortized(dest, new_size);
  return absl::StringResizeAndOverwrite(dest, new_size, std::move(op));
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_STRING_UTILS_H_


================================================
FILE: riegeli/base/temporary_storage.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_TEMPORARY_STORAGE_H_
#define RIEGELI_BASE_TEMPORARY_STORAGE_H_

#include <stdint.h>

#include <new>  // IWYU pragma: keep
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "riegeli/base/assert.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// Internal storage used by functions which return a reference to either an
// existing object or a newly constructed one.
//
// Such functions take a parameter
//   `TemporaryStorage<T>&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND = {}`
// so that the default value is allocated as a temporary by the caller.
//
// The parameter can also be passed explicitly if a call to these functions
// happens in a context which needs the returned reference to be valid longer
// than the full expression containing the call. This passes the responsibility
// for passing a `TemporaryStorage<T>` with a suitable lifetime to the caller of
// that context.
template <typename T, typename Enable = void>
class TemporaryStorage {
 public:
  TemporaryStorage() noexcept {}

  TemporaryStorage(const TemporaryStorage&) = delete;
  TemporaryStorage& operator=(const TemporaryStorage&) = delete;

  ~TemporaryStorage() {
    if (state_ != State::kUninitialized) value_.~T();
  }

  template <typename... Args,
            std::enable_if_t<std::is_constructible_v<T, Args&&...>, int> = 0>
      T& emplace(Args&&... args) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT_EQ(state_, State::kUninitialized)
        << "Failed precondition of TemporaryStorage::emplace()";
    new (&value_) T(std::forward<Args>(args)...);
    state_ = State::kValid;
    return value_;
  }
  template <typename... Args,
            std::enable_if_t<std::is_constructible_v<T, Args&&...>, int> = 0>
      T&& emplace(Args&&... args) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT_EQ(state_, State::kUninitialized)
        << "Failed precondition of TemporaryStorage::emplace()";
    new (&value_) T(std::forward<Args>(args)...);
    state_ = State::kMovedFrom;
    return std::move(value_);
  }

  T& operator*() & ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT_EQ(state_, State::kValid)
        << "Failed precondition of TemporaryStorage::operator*";
    return value_;
  }
  const T& operator*() const& ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT_EQ(state_, State::kValid)
        << "Failed precondition of TemporaryStorage::operator*";
    return value_;
  }
  T&& operator*() && ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT_EQ(state_, State::kValid)
        << "Failed precondition of TemporaryStorage::operator*";
    state_ = State::kMovedFrom;
    return std::move(value_);
  }
  const T&& operator*() const&& ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT_EQ(state_, State::kValid)
        << "Failed precondition of TemporaryStorage::operator*";
    state_ = State::kMovedFrom;
    return std::move(value_);
  }

 private:
  enum class State : uint8_t {
    kUninitialized = 0,
    kValid = 1,
    kMovedFrom = 2,
  };

  union {
    std::remove_cv_t<T> value_;
  };
  mutable State state_ = State::kUninitialized;
};

// Specialization of `TemporaryStorage<T>` for non-reference trivially
// destructible but not trivially default constructible types. There is no need
// to track whether the object was initialized.
template <typename T>
class TemporaryStorage<
    T,
    std::enable_if_t<std::conjunction_v<
        std::negation<std::is_reference<T>>, std::is_trivially_destructible<T>,
        std::negation<std::is_trivially_default_constructible<T>>>>> {
 public:
  TemporaryStorage() noexcept {}

  TemporaryStorage(const TemporaryStorage&) = delete;
  TemporaryStorage& operator=(const TemporaryStorage&) = delete;

  template <typename... Args,
            std::enable_if_t<std::is_constructible_v<T, Args&&...>, int> = 0>
      T& emplace(Args&&... args) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
    new (&value_) T(std::forward<Args>(args)...);
    return value_;
  }
  template <typename... Args,
            std::enable_if_t<std::is_constructible_v<T, Args&&...>, int> = 0>
      T&& emplace(Args&&... args) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
    new (&value_) T(std::forward<Args>(args)...);
    return std::move(value_);
  }

  T& operator*() & ABSL_ATTRIBUTE_LIFETIME_BOUND { return value_; }
  const T& operator*() const& ABSL_ATTRIBUTE_LIFETIME_BOUND { return value_; }
  T&& operator*() && ABSL_ATTRIBUTE_LIFETIME_BOUND { return std::move(value_); }
  const T&& operator*() const&& ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(value_);
  }

 private:
  union {
    std::remove_cv_t<T> value_;
  };
};

// Specialization of `TemporaryStorage<T>` for non-reference trivially
// destructible and trivially default constructible types. There is no need to
// track whether the object was initialized, and
// `ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS` can be applied.
template <typename T>
class TemporaryStorage<
    T,
    std::enable_if_t<std::conjunction_v<
        std::negation<std::is_reference<T>>, std::is_trivially_destructible<T>,
        std::is_trivially_default_constructible<T>>>> {
 public:
  TemporaryStorage() = default;

  TemporaryStorage(const TemporaryStorage&) = delete;
  TemporaryStorage& operator=(const TemporaryStorage&) = delete;

  template <typename... Args,
            std::enable_if_t<std::is_constructible_v<T, Args&&...>, int> = 0>
      T& emplace(Args&&... args) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
    new (&value_) T(std::forward<Args>(args)...);
    return value_;
  }
  template <typename... Args,
            std::enable_if_t<std::is_constructible_v<T, Args&&...>, int> = 0>
      T&& emplace(Args&&... args) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
    new (&value_) T(std::forward<Args>(args)...);
    return std::move(value_);
  }

  T& operator*() & ABSL_ATTRIBUTE_LIFETIME_BOUND { return value_; }
  const T& operator*() const& ABSL_ATTRIBUTE_LIFETIME_BOUND { return value_; }
  T&& operator*() && ABSL_ATTRIBUTE_LIFETIME_BOUND { return std::move(value_); }
  const T&& operator*() const&& ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(value_);
  }

 private:
  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS std::remove_cv_t<T> value_;
};

// Specialization of `TemporaryStorage<T>` for reference types.
template <typename T>
class TemporaryStorage<T, std::enable_if_t<std::is_reference_v<T>>> {
 public:
  TemporaryStorage() = default;

  TemporaryStorage(const TemporaryStorage&) = delete;
  TemporaryStorage& operator=(const TemporaryStorage&) = delete;

  template <typename Arg,
            std::enable_if_t<std::is_convertible_v<Arg&&, T>, int> = 0>
      T& emplace(Arg&& arg) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
    value_ = &arg;
    return *value_;
  }
  template <typename Arg,
            std::enable_if_t<std::is_convertible_v<Arg&&, T>, int> = 0>
      T&& emplace(Arg&& arg) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
    value_ = &arg;
    return std::forward<T>(*value_);
  }

  T& operator*() & ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT(value_ != nullptr)
        << "Failed precondition of TemporaryStorage::operator*";
    return *value_;
  }
  const T& operator*() const& ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT(value_ != nullptr)
        << "Failed precondition of TemporaryStorage::operator*";
    return *value_;
  }
  T&& operator*() && ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT(value_ != nullptr)
        << "Failed precondition of TemporaryStorage::operator*";
    return std::forward<T>(*value_);
  }
  const T&& operator*() const&& ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT(value_ != nullptr)
        << "Failed precondition of TemporaryStorage::operator*";
    return std::forward<T>(*value_);
  }

 private:
  std::remove_reference_t<T>* absl_nullable value_;
};

}  // namespace riegeli

#endif  // RIEGELI_BASE_TEMPORARY_STORAGE_H_


================================================
FILE: riegeli/base/type_erased_ref.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_TYPE_ERASED_REF_H_
#define RIEGELI_BASE_TYPE_ERASED_REF_H_

#include <memory>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/meta/type_traits.h"
#include "riegeli/base/type_traits.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// `TypeErasedRef` wraps a reference, and allows to recover the original
// reference as long as its original type is provided.
//
// `TypeErasedRef(std::forward<T>(value)).Cast<T>()` recovers the value of
// `std::forward<T>(value)`.
//
// This is like converting the reference to a pointer and casting it to `void*`,
// and casting back for recovery, but this correctly handles const references
// and references to functions.
//
// Specifying `T` or `T&&` for recovery is interchangeable.
class TypeErasedRef {
 private:
  template <typename T>
  struct IsFunctionRef : std::false_type {};

  template <typename T>
  struct IsFunctionRef<T&> : std::is_function<T> {};

 public:
  // Creates an empty `TypeErasedRef`. It cannot be recovered as any type.
  //
  // Conversion from `std::nullptr_t` is not supported because that binds
  // to a reference to `nullptr` instead of being empty.
  TypeErasedRef() = default;

  // Wraps `std::forward<T>(value)`.
  template <typename T, std::enable_if_t<
                            std::conjunction_v<NotSameRef<TypeErasedRef, T>,
                                               std::negation<IsFunctionRef<T>>>,
                            int> = 0>
  explicit TypeErasedRef(T&& value ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : ptr_(const_cast<absl::remove_cvref_t<T>*>(std::addressof(value))) {}

  // Wraps a function reference.
  //
  // The implementation relies on the assumption that a function pointer can be
  // `reinterpret_cast` to `void*` and back.
  template <typename T,
            std::enable_if_t<
                // `NotSameRef` is not needed because `T` is a function
                // reference, so it is never `TypeErasedRef`.
                IsFunctionRef<T>::value, int> = 0>
  explicit TypeErasedRef(T&& value) : ptr_(reinterpret_cast<void*>(&value)) {}

  TypeErasedRef(const TypeErasedRef& that) = default;
  TypeErasedRef& operator=(const TypeErasedRef& that) = default;

  // Recovers the `T&&`.
  template <typename T>
  T&& Cast() const {
    if constexpr (!IsFunctionRef<T>::value) {
      return std::forward<T>(
          *reinterpret_cast<std::remove_reference_t<T>*>(ptr_));
    } else {
      return *reinterpret_cast<std::remove_reference_t<T>*>(ptr_);
    }
  }

  // Returns `true` if the `TypeErasedRef` is empty, i.e. default-constructed.
  bool empty() const { return ptr_ == nullptr; }

 private:
  void* absl_nullable ptr_ = nullptr;
};

}  // namespace riegeli

#endif  // RIEGELI_BASE_TYPE_ERASED_REF_H_


================================================
FILE: riegeli/base/type_id.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_TYPE_ID_H_
#define RIEGELI_BASE_TYPE_ID_H_

#include <cstddef>
#include <functional>
#include <utility>

#include "absl/base/nullability.h"
#include "riegeli/base/compare.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// `TypeId::For<A>()` is a token which is equal to `TypeId::For<B>()` whenever
// `A` and `B` are the same type.
//
// `TypeId()` is another value not equal to any other.
class ABSL_NULLABILITY_COMPATIBLE TypeId : public WithCompare<TypeId> {
 public:
  constexpr TypeId() = default;
  /*implicit*/ constexpr TypeId(std::nullptr_t) noexcept {}

  TypeId(const TypeId& that) = default;
  TypeId& operator=(const TypeId& that) = default;

  template <typename T>
  static constexpr TypeId For();

  friend constexpr bool operator==(TypeId a, TypeId b) {
    return a.ptr_ == b.ptr_;
  }
  friend StrongOrdering RIEGELI_COMPARE(TypeId a, TypeId b) {
    if (std::less<>()(a.ptr_, b.ptr_)) return StrongOrdering::less;
    if (std::greater<>()(a.ptr_, b.ptr_)) return StrongOrdering::greater;
    return StrongOrdering::equal;
  }

  template <typename HashState>
  friend HashState AbslHashValue(HashState hash_state, TypeId self) {
    return HashState::combine(std::move(hash_state), self.ptr_);
  }

 private:
  using pointer = void*;  // For `ABSL_NULLABILITY_COMPATIBLE`.

  template <typename T>
  struct TypeIdToken;

  explicit constexpr TypeId(const void* ptr) : ptr_(ptr) {}

  const void* absl_nullable ptr_ = nullptr;
};

// Implementation details follow.

template <typename T>
struct TypeId::TypeIdToken {
  static const char token;
};

template <typename T>
const char TypeId::TypeIdToken<T>::token = '\0';

template <typename T>
constexpr TypeId TypeId::For() {
  return TypeId(&TypeIdToken<T>::token);
}

}  // namespace riegeli

#endif  // RIEGELI_BASE_TYPE_ID_H_


================================================
FILE: riegeli/base/type_traits.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_TYPE_TRAITS_H_
#define RIEGELI_BASE_TYPE_TRAITS_H_

#include <stddef.h>

#include <functional>
#include <string>
#include <tuple>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/strings/string_view.h"
#include "absl/utility/utility.h"  // IWYU pragma: keep

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// `type_identity<T>::type` and `type_identity_t<T>` are `T`, but do not deduce
// the `T` in templates.

template <typename T>
struct type_identity {
  using type = T;
};

template <typename T>
using type_identity_t = typename type_identity<T>::type;

// `unwrap_reference<T>::type` and `unwrap_reference_t<T>` changes
// `std::reference_wrapper<U>` to `U&`, leaving other types unchanged.
//
// This is `std::unwrap_reference` from C++20.

#if __cpp_lib_unwrap_ref

using std::unwrap_reference;
using std::unwrap_reference_t;

#else

template <typename T>
struct unwrap_reference : type_identity<T> {};

template <typename T>
struct unwrap_reference<std::reference_wrapper<T>> : type_identity<T&> {};

template <typename T>
using unwrap_reference_t = typename unwrap_reference<T>::type;

#endif

// `unwrap_ref_decay<T>::type` and `unwrap_ref_decay_t<T>` changes
// `std::reference_wrapper<U>` to `U&`, and other types like `std::decay`.
//
// This is `std::unwrap_ref_decay` from C++20.

#if __cpp_lib_unwrap_ref

using std::unwrap_ref_decay;
using std::unwrap_ref_decay_t;

#else

template <typename T>
struct unwrap_ref_decay : unwrap_reference<std::decay<T>> {};

template <typename T>
using unwrap_ref_decay_t = typename unwrap_ref_decay<T>::type;

#endif

// `IsConvertibleFromResult<T, Result>` is like
// `std::is_convertible<Result, T>`, except that `Result` represents the
// result of a function: due to copy elision, `T` and `Result` can also be the
// same immovable type, possibly with different qualifiers.
template <typename T, typename Result>
struct IsConvertibleFromResult
    : std::disjunction<
          std::is_same<std::remove_cv_t<T>, std::remove_cv_t<Result>>,
          std::is_convertible<Result, T>> {};

// `IsConstructibleFromResult<T, Result>` is like
// `std::is_constructible<T, Result>`, except that `Result` represents the
// result of a function: due to copy elision, `T` and `Result` can also be the
// same immovable type, possibly with different qualifiers.
template <typename T, typename Result>
struct IsConstructibleFromResult
    : std::disjunction<
          std::is_same<std::remove_cv_t<T>, std::remove_cv_t<Result>>,
          std::is_constructible<T, Result>> {};

// `SameRef<Self, Args...>::value` is `true` if a constructor or assignment of
// `Self` from a reference to `Args...` would conflict with the copy or move
// constructor or assignment.
//
// This means a single argument which is reference to `Self` or a class derived
// from `Self`, ignoring `const`.

template <typename Self, typename... Args>
struct SameRef : std::false_type {};

template <typename Self, typename Arg>
struct SameRef<Self, Arg> : std::is_convertible<std::decay_t<Arg>*, Self*> {};

// `NotSameRef` is the negation of `SameRef`.
//
// This should be included in constraints of a templated constructor or
// assignment where such a conflict is possible. This makes argument types
// compatible with copying or moving interpreted as the copy or move,
// instead of passing them to the templated constructor or assignment.
template <typename Self, typename... Args>
struct NotSameRef : std::negation<SameRef<Self, Args...>> {};

namespace type_traits_internal {

// Transforms a `std::tuple` type to another `std::tuple` type by selecting
// element types corresponding to the given `std::index_sequence`.
template <typename Tuple, typename Indices>
struct SelectTypesFromTuple;

template <typename Tuple, size_t... indices>
struct SelectTypesFromTuple<Tuple, std::index_sequence<indices...>> {
  using type = std::tuple<std::tuple_element_t<indices, Tuple>...>;
};

// Selects element types from a `std::tuple` type corresponding to the given
// `std::index_sequence`.
template <typename Tuple, size_t... indices>
std::tuple<std::tuple_element_t<indices, Tuple>...> SelectFromTuple(
    ABSL_ATTRIBUTE_UNUSED Tuple&& tuple, std::index_sequence<indices...>) {
  return {std::forward<std::tuple_element_t<indices, Tuple>>(
      std::get<indices>(tuple))...};
}

// SFINAE-friendly helper for `GetTypeFromEnd`.
template <typename Enable, size_t reverse_index, typename... T>
struct GetTypeFromEndImpl {};
template <size_t reverse_index, typename... T>
struct GetTypeFromEndImpl<
    std::enable_if_t<(reverse_index > 0 && reverse_index <= sizeof...(T))>,
    reverse_index, T...>
    : std::tuple_element<sizeof...(T) - reverse_index, std::tuple<T...>> {};

// SFINAE-friendly helper for `RemoveTypesFromEnd`.
template <typename Enable, size_t num_from_end, typename... T>
struct RemoveTypesFromEndImpl {};
template <size_t num_from_end, typename... T>
struct RemoveTypesFromEndImpl<std::enable_if_t<(num_from_end <= sizeof...(T))>,
                              num_from_end, T...>
    : SelectTypesFromTuple<std::tuple<T...>, std::make_index_sequence<
                                                 sizeof...(T) - num_from_end>> {
};

// Concatenates `std::index_sequence` types.
template <typename... index_sequences>
struct ConcatIndexSequences;

template <>
struct ConcatIndexSequences<> {
  using type = std::index_sequence<>;
};
template <typename index_sequence>
struct ConcatIndexSequences<index_sequence> {
  using type = index_sequence;
};
template <size_t... indices1, size_t... indices2, typename... index_sequences>
struct ConcatIndexSequences<std::index_sequence<indices1...>,
                            std::index_sequence<indices2...>,
                            index_sequences...> {
  using type = typename ConcatIndexSequences<
      std::index_sequence<indices1..., indices2...>, index_sequences...>::type;
};

// Transforms a tuple type to a `std::index_sequence` type of indices of
// elements satisfying a predicate.
template <template <typename...> class Predicate, typename Tuple,
          typename Indices>
struct FilterTypeImpl;

template <template <typename...> class Predicate, typename Tuple,
          size_t... indices>
struct FilterTypeImpl<Predicate, Tuple, std::index_sequence<indices...>>
    : type_traits_internal::ConcatIndexSequences<std::conditional_t<
          Predicate<std::tuple_element_t<indices, Tuple>>::value,
          std::index_sequence<indices>, std::index_sequence<>>...> {};

}  // namespace type_traits_internal

// `GetTypeFromEnd<reverse_index, T...>::type` and
// `GetTypeFromEndT<reverse_index, T...>` extract a type from a parameter pack
// by its index from the end (1 = last).
template <size_t reverse_index, typename... T>
struct GetTypeFromEnd
    : type_traits_internal::GetTypeFromEndImpl<void, reverse_index, T...> {};
template <size_t reverse_index, typename... T>
using GetTypeFromEndT = typename GetTypeFromEnd<reverse_index, T...>::type;

// `GetFromEnd<reverse_index>(args...)` extracts an argument from a sequence of
// arguments by its index from the end (1 = last).
template <size_t reverse_index, typename... Args,
          std::enable_if_t<
              (reverse_index > 0 && reverse_index <= sizeof...(Args)), int> = 0>
inline GetTypeFromEndT<reverse_index, Args&&...> GetFromEnd(Args&&... args) {
  return std::get<sizeof...(Args) - reverse_index>(
      std::tuple<Args&&...>(std::forward<Args>(args)...));
}

// `RemoveTypesFromEnd<num_from_end, T...>::type` and
// `RemoveTypesFromEndT<num_from_end, T...>` transform a parameter pack to a
// `std::tuple` type by removing the given number of elements from the end.
template <size_t num_from_end, typename... T>
struct RemoveTypesFromEnd
    : type_traits_internal::RemoveTypesFromEndImpl<void, num_from_end, T...> {};
template <size_t num_from_end, typename... T>
using RemoveTypesFromEndT =
    typename RemoveTypesFromEnd<num_from_end, T...>::type;

// `RemoveFromEnd<num_from_end>(args...)` transforms a sequence of arguments to
// a `std::tuple` by removing the given number of arguments from the end.
template <size_t num_from_end, typename... Args,
          std::enable_if_t<(num_from_end <= sizeof...(Args)), int> = 0>
inline RemoveTypesFromEndT<num_from_end, Args&&...> RemoveFromEnd(
    Args&&... args) {
  return type_traits_internal::SelectFromTuple(
      std::tuple<Args&&...>(std::forward<Args>(args)...),
      std::make_index_sequence<sizeof...(Args) - num_from_end>());
}

// `ApplyToTupleElements<F, std::tuple<T...>>::type` and
// `ApplyToTupleElementsT<F, std::tuple<T...>>` is `F<T...>`.
template <template <typename... Args> class F, typename Tuple>
struct ApplyToTupleElements;
template <template <typename... Args> class F, typename... T>
struct ApplyToTupleElements<F, std::tuple<T...>> {
  using type = F<T...>;
};
template <template <typename... Args> class F, typename Tuple>
using ApplyToTupleElementsT = typename ApplyToTupleElements<F, Tuple>::type;

// `TupleElementsSatisfy<Tuple, Predicate>::value` checks if all element types
// of a `std::tuple` type satisfy a predicate.
template <typename Tuple, template <typename...> class Predicate>
struct TupleElementsSatisfy;

template <typename... T, template <typename...> class Predicate>
struct TupleElementsSatisfy<std::tuple<T...>, Predicate>
    : std::conjunction<Predicate<T>...> {};

// `FilterType<Predicate, T...>::type` and
// `FilterTypeT<Predicate, T...>` transform a parameter pack to a `std::tuple`
// type by selecting types satisfying a predicate.
template <template <typename...> class Predicate, typename... T>
struct FilterType
    : type_traits_internal::SelectTypesFromTuple<
          std::tuple<T...>, typename type_traits_internal::FilterTypeImpl<
                                Predicate, std::tuple<T...>,
                                std::index_sequence_for<T...>>::type> {};
template <template <typename...> class Predicate, typename... T>
using FilterTypeT = typename FilterType<Predicate, T...>::type;

// `Filter<Predicate>(args...)` transforms a sequence of arguments to a
// `std::tuple` by selecting types satisfying a predicate.
template <template <typename...> class Predicate, typename... Args>
inline FilterTypeT<Predicate, Args&&...> Filter(Args&&... args) {
  return type_traits_internal::SelectFromTuple(
      std::tuple<Args&&...>(std::forward<Args>(args)...),
      typename type_traits_internal::FilterTypeImpl<
          Predicate, std::tuple<Args&&...>,
          std::index_sequence_for<Args&&...>>::type());
}

// `DecayTupleType<Tuple>::type` and `DecayTupleTypeT<Tuple>` transform a
// `std::tuple` type by decaying all elements from references to values.
template <typename Tuple>
struct DecayTupleType;

template <typename... T>
struct DecayTupleType<std::tuple<T...>> {
  using type = std::tuple<std::decay_t<T>...>;
};
template <typename Tuple>
using DecayTupleTypeT = typename DecayTupleType<Tuple>::type;

// `DecayTuple(tuple)` transforms a `std::tuple` by decaying all elements from
// references to values.
template <typename Tuple>
inline DecayTupleTypeT<Tuple> DecayTuple(Tuple&& tuple) {
  return tuple;
}

// `DeduceClassTemplateArguments<Template, Args...>::type` and
// `DeduceClassTemplateArgumentsT<Template, Args...>` deduce class template
// arguments using CTAD from constructor arguments.
//
// Only class templates with solely type template parameters are supported.

template <template <typename...> class Template, typename... Args>
struct DeduceClassTemplateArguments {
  using type = decltype(Template(std::declval<Args>()...));
};

template <template <typename...> class Template, typename... Args>
using DeduceClassTemplateArgumentsT =
    typename DeduceClassTemplateArguments<Template, Args...>::type;

// `IntersectionType<Ts...>::type` and `IntersectionTypeT<Ts...>` compute the
// smallest of unsigned integer types.

namespace type_traits_internal {

template <typename A, typename B, typename Common>
struct IntersectionTypeImpl;

template <typename A, typename B>
struct IntersectionTypeImpl<A, B, A> {
  using type = B;
};

template <typename A, typename B>
struct IntersectionTypeImpl<A, B, B> {
  using type = A;
};

template <typename A>
struct IntersectionTypeImpl<A, A, A> {
  using type = A;
};

}  // namespace type_traits_internal

template <typename... T>
struct IntersectionType;

template <typename... T>
using IntersectionTypeT = typename IntersectionType<T...>::type;

template <typename A>
struct IntersectionType<A> {
  using type = A;
};

template <typename A, typename B>
struct IntersectionType<A, B>
    : type_traits_internal::IntersectionTypeImpl<A, B,
                                                 std::common_type_t<A, B>> {};

template <typename A, typename B, typename... Rest>
struct IntersectionType<A, B, Rest...>
    : IntersectionType<IntersectionTypeT<A, B>, Rest...> {};

// `HasDereference<T>::value` is `true` if a value of type `T` can be
// dereferenced with `operator*`.

template <typename T, typename Enable = void>
struct HasDereference : std::false_type {};

template <typename T>
struct HasDereference<T, std::void_t<decltype(*std::declval<T>())>>
    : std::true_type {};

// `HasArrow<T>::value` is `true` if a value of type `T` can be dereferenced
// with `operator->`.

template <typename T, typename Enable = void>
struct HasArrow : std::false_type {};

template <typename T>
struct HasArrow<T, std::enable_if_t<std::is_pointer_v<
                       std::decay_t<decltype(std::declval<T>())>>>>
    : std::true_type {};

template <typename T>
struct HasArrow<T, std::void_t<decltype(std::declval<T>().operator->())>>
    : std::true_type {};

// `IsComparableAgainstNullptr<T>::value` is `true` if a value of type `T` can
// be compared against `nullptr`.

template <typename T, typename Enable = void>
struct IsComparableAgainstNullptr : std::false_type {};

template <typename CharT, typename Traits, typename Alloc>
struct IsComparableAgainstNullptr<std::basic_string<CharT, Traits, Alloc>>
    : std::false_type {};

template <>
struct IsComparableAgainstNullptr<absl::string_view> : std::false_type {};

template <typename T>
struct IsComparableAgainstNullptr<
    T, std::enable_if_t<
           std::is_convertible_v<decltype(std::declval<T>() == nullptr), bool>>>
    : std::true_type {};

// Deriving a class from
// `ConditionallyConstructible<copy_constructible, move_constructible>`
// disables copy constructor if `!copy_constructible`, and
// disables move constructor if `!move_constructible`.
//
// A derived class should have the constructors defaulted, so that they get
// effectively conditionally defaulted or deleted. An explicit definition of
// constructors in the derived class would make them available unconditionally;
// if explicit definitions are desired, they must come from another base class.
//
// This is useful when explicitly defined constructors are only conditionally
// valid (SFINAE is not applicable to special member functions). Since C++20,
// `requires` can be used instead.
//
// This is also useful, together with `ConditionallyAssignable`, if the class
// has a member pointing to another member. With defaulted constructors and
// assignments, the pointer would not be repositioned, while implementing the
// repositioning explicitly is not always feasible.

template <bool copy_constructible, bool move_constructible = copy_constructible>
class ConditionallyConstructible {};

template <>
class ConditionallyConstructible<false, false> {
 public:
  ConditionallyConstructible() = default;

  ConditionallyConstructible(const ConditionallyConstructible&) = delete;
  ConditionallyConstructible(ConditionallyConstructible&&) = delete;

  ConditionallyConstructible& operator=(const ConditionallyConstructible&) =
      default;
  ConditionallyConstructible& operator=(ConditionallyConstructible&&) = default;
};

template <>
class ConditionallyConstructible<false, true> {
 public:
  ConditionallyConstructible() = default;

  ConditionallyConstructible(const ConditionallyConstructible&) = delete;
  ConditionallyConstructible(ConditionallyConstructible&&) = default;

  ConditionallyConstructible& operator=(const ConditionallyConstructible&) =
      default;
  ConditionallyConstructible& operator=(ConditionallyConstructible&&) = default;
};

template <>
class ConditionallyConstructible<true, true> {
 public:
  ConditionallyConstructible() = default;

  ConditionallyConstructible(const ConditionallyConstructible&) = default;
  ConditionallyConstructible(ConditionallyConstructible&&) = default;

  ConditionallyConstructible& operator=(const ConditionallyConstructible&) =
      default;
  ConditionallyConstructible& operator=(ConditionallyConstructible&&) = default;
};

// Deriving a class from
// `ConditionallyAssignable<copy_assignable, move_assignable>`
// disables copy assignment if `!copy_assignable`, and
// disables move assignment if `!move_assignable`.
//
// A derived class should have the assignments defaulted, so that they get
// effectively conditionally defaulted or deleted. An explicit definition of
// assignments in the derived class would make them available unconditionally;
// if explicit definitions are desired, they must come from another base class.
//
// This is useful when explicitly defined assignments are only conditionally
// valid (SFINAE is not applicable to special member functions). Since C++20,
// `requires` can be used instead.
//
// This is also useful if the class has a `std::tuple` member whose elements
// can be references. While a direct reference member would disable defaulted
// assignments, a `std::tuple` containing references assigns through reference
// elements, which might have undesirable semantics.

template <bool copy_assignable, bool move_assignable = copy_assignable>
class ConditionallyAssignable;

template <>
class ConditionallyAssignable<false, false> {
 public:
  ConditionallyAssignable() = default;

  ConditionallyAssignable(const ConditionallyAssignable&) = default;
  ConditionallyAssignable(ConditionallyAssignable&&) = default;

  ConditionallyAssignable& operator=(const ConditionallyAssignable&) = delete;
  ConditionallyAssignable& operator=(ConditionallyAssignable&&) = delete;
};

template <>
class ConditionallyAssignable<false, true> {
 public:
  ConditionallyAssignable() = default;

  ConditionallyAssignable(const ConditionallyAssignable&) = default;
  ConditionallyAssignable(ConditionallyAssignable&&) = default;

  ConditionallyAssignable& operator=(const ConditionallyAssignable&) = delete;
  ConditionallyAssignable& operator=(ConditionallyAssignable&&) = default;
};

template <>
class ConditionallyAssignable<true, true> {
 public:
  ConditionallyAssignable() = default;

  ConditionallyAssignable(const ConditionallyAssignable&) = default;
  ConditionallyAssignable(ConditionallyAssignable&&) = default;

  ConditionallyAssignable& operator=(const ConditionallyAssignable&) = default;
  ConditionallyAssignable& operator=(ConditionallyAssignable&&) = default;
};

}  // namespace riegeli

#endif  // RIEGELI_BASE_TYPE_TRAITS_H_


================================================
FILE: riegeli/base/types.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_TYPES_H_
#define RIEGELI_BASE_TYPES_H_

#include <stddef.h>
#include <stdint.h>

#include <ios>
#include <type_traits>

#include "absl/base/nullability.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// Position in a stream of bytes, used also for stream sizes.
//
// This is an unsigned integer type at least as wide as `size_t`,
// `std::streamoff`, and `uint64_t`.
using Position =
    std::common_type_t<size_t, std::make_unsigned_t<std::streamoff>, uint64_t>;

// Specifies the scope of objects to flush and the intended data durability
// (without a guarantee).
enum class FlushType {
  // Makes data written so far visible in other objects, propagating flushing
  // through owned dependencies of the given writer.
  kFromObject = 0,
  // Makes data written so far visible outside the process, propagating flushing
  // through dependencies of the given writer. This is generally the default.
  kFromProcess = 1,
  // Makes data written so far visible outside the process and durable in case
  // of operating system crash, propagating flushing through dependencies of the
  // given writer.
  kFromMachine = 2,
};

// Specifies the scope of objects to synchronize.
enum class SyncType {
  // Propagates synchronization through owned dependencies of the given reader.
  kFromObject = 0,
  // Propagates synchronization through all dependencies of the given reader.
  // This is generally the default.
  kFromProcess = 1,
};

}  // namespace riegeli

#endif  // RIEGELI_BASE_TYPES_H_


================================================
FILE: riegeli/base/unicode.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#endif

#include "riegeli/base/unicode.h"  // IWYU pragma: keep

#ifdef _WIN32

#include <stddef.h>
#include <windows.h>

#include <limits>
#include <string>

#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/strings/resize_and_overwrite.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

bool Utf8ToWide(absl::string_view src, std::wstring& dest) {
  dest.clear();
  if (src.empty()) return true;
  if (ABSL_PREDICT_FALSE(src.size() >
                         unsigned{std::numeric_limits<int>::max()})) {
    return false;
  }
  const int dest_size =
      MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src.data(),
                          IntCast<int>(src.size()), nullptr, 0);
  if (ABSL_PREDICT_FALSE(dest_size == 0)) return false;
  dest.resize(IntCast<size_t>(dest_size));
  MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src.data(),
                      IntCast<int>(src.size()), dest.data(), dest_size);
  return true;
}

bool WideToUtf8(absl::Span<const wchar_t> src, std::string& dest) {
  dest.clear();
  if (src.empty()) return true;
  if (ABSL_PREDICT_FALSE(src.size() >
                         unsigned{std::numeric_limits<int>::max()})) {
    return false;
  }
  const int dest_size = WideCharToMultiByte(
      CP_UTF8, WC_ERR_INVALID_CHARS, src.data(), IntCast<int>(src.size()),
      nullptr, 0, nullptr, nullptr);
  if (ABSL_PREDICT_FALSE(dest_size == 0)) return false;
  absl::StringResizeAndOverwrite(
      dest, IntCast<size_t>(dest_size), [&](char* data, size_t size) {
        WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, src.data(),
                            IntCast<int>(src.size()), data, IntCast<int>(size),
                            nullptr, nullptr);
        return size;
      });
  return true;
}

std::string WideToUtf8Lossy(absl::Span<const wchar_t> src) {
  std::string dest;
  if (src.empty()) return dest;
  const int dest_size = WideCharToMultiByte(CP_UTF8, 0, src.data(),
                                            SaturatingIntCast<int>(src.size()),
                                            nullptr, 0, nullptr, nullptr);
  absl::StringResizeAndOverwrite(
      dest, IntCast<size_t>(dest_size), [&](char* data, size_t size) {
        WideCharToMultiByte(CP_UTF8, 0, src.data(),
                            SaturatingIntCast<int>(src.size()), data,
                            IntCast<int>(size), nullptr, nullptr);
        return size;
      });
  return dest;
}

}  // namespace riegeli

#endif  // _WIN32


================================================
FILE: riegeli/base/unicode.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_UNICODE_H_
#define RIEGELI_BASE_UNICODE_H_

#ifdef _WIN32

#include <string>

#include "absl/base/nullability.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// Converts from UTF-8 string to `wchar_t[]`.
//
// Returns `false` on failure.
bool Utf8ToWide(absl::string_view src, std::wstring& dest);

// Converts from `wchar_t[]` to UTF-8 string.
//
// Returns `false` on failure.
bool WideToUtf8(absl::Span<const wchar_t> src, std::string& dest);

// Converts from `wchar_t[]` to UTF-8 string.
//
// Emits replacement characters on failure.
std::string WideToUtf8Lossy(absl::Span<const wchar_t> src);

}  // namespace riegeli

#endif  // _WIN32

#endif  // RIEGELI_BASE_UNICODE_H_


================================================
FILE: riegeli/base/uninitialized_vector.h
================================================
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BASE_UNINITIALIZED_VECTOR_H_
#define RIEGELI_BASE_UNINITIALIZED_VECTOR_H_

#include <stddef.h>

#include <memory>
#include <utility>
#include <vector>

#include "absl/base/nullability.h"
#include "absl/container/inlined_vector.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// An allocator which default-initializes a newly constructed element instead of
// value-initializing it.
//
// When used with a container of a trivially constructible type,
// newly allocated elements are left uninitialized. The container must use
// `allocator::construct()` to construct the elements.
//
// This makes it faster to resize the container. This is meant for the cases
// when its contents will be filled right after resizing. This also allows
// detecting usages of unfilled elements by msan.
//
// Based on https://howardhinnant.github.io/allocator_boilerplate.html.
template <typename T>
class UninitializedAllocator : public std::allocator<T> {
 public:
  using UninitializedAllocator::allocator::allocator;

  template <typename U, typename... Args>
  void construct(U* ptr, Args&&... args) {
    if constexpr (sizeof...(args) == 0) {
      ::new (ptr) U;  // Default initialization, not value initialization.
    } else {
      ::new (ptr) U(std::forward<Args>(args)...);
    }
  }
};

// Like `std::vector`, but newly allocated elements of trivially-constructible
// types are left uninitialized.
//
// This makes it faster to resize the vector. This is meant for the cases when
// its contents will be filled right after resizing. This also allows detecting
// usages of unfilled elements by msan.
template <typename T>
using UninitializedVector = std::vector<T, UninitializedAllocator<T>>;

// Like `absl::InlinedVector`, but newly allocated elements of
// trivially-constructible types are left uninitialized.
//
// This makes it faster to resize the vector. This is meant for the cases when
// its contents will be filled right after resizing. This also allows detecting
// usages of unfilled elements by msan.
template <typename T, size_t inlined_size>
using UninitializedInlinedVector =
    absl::InlinedVector<T, inlined_size, UninitializedAllocator<T>>;

}  // namespace riegeli

#endif  // RIEGELI_BASE_UNINITIALIZED_VECTOR_H_


================================================
FILE: riegeli/brotli/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "brotli_reader",
    srcs = ["brotli_reader.cc"],
    hdrs = ["brotli_reader.h"],
    deps = [
        ":brotli_allocator",
        ":brotli_dictionary",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:shared_ptr",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:pullable_reader",
        "//riegeli/bytes:reader",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@org_brotli//:brotlicommon",
        "@org_brotli//:brotlidec",
    ],
)

cc_library(
    name = "brotli_writer",
    srcs = ["brotli_writer.cc"],
    hdrs = ["brotli_writer.h"],
    deps = [
        ":brotli_allocator",
        ":brotli_dictionary",
        ":brotli_reader",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:shared_ptr",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:buffer_options",
        "//riegeli/bytes:buffered_writer",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@org_brotli//:brotlienc",
    ],
)

cc_library(
    name = "brotli_dictionary",
    srcs = ["brotli_dictionary.cc"],
    hdrs = ["brotli_dictionary.h"],
    visibility = ["//visibility:private"],
    deps = [
        "//riegeli/base:assert",
        "//riegeli/base:bytes_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:shared_ptr",
        "@com_google_absl//absl/base",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@org_brotli//:brotlicommon",
        "@org_brotli//:brotlienc",
    ],
)

cc_library(
    name = "brotli_allocator",
    srcs = ["brotli_allocator.cc"],
    hdrs = ["brotli_allocator.h"],
    visibility = ["//visibility:private"],
    deps = [
        "//riegeli/base:shared_ptr",
        "@org_brotli//:brotlicommon",
    ],
)


================================================
FILE: riegeli/brotli/brotli_allocator.cc
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/brotli/brotli_allocator.h"

#include <stddef.h>

namespace riegeli {

namespace brotli_internal {

void* RiegeliBrotliAllocFunc(void* opaque, size_t size) {
  return static_cast<const BrotliAllocator::Interface*>(opaque)->Alloc(size);
}

void RiegeliBrotliFreeFunc(void* opaque, void* ptr) {
  static_cast<const BrotliAllocator::Interface*>(opaque)->Free(ptr);
}

}  // namespace brotli_internal

BrotliAllocator::Interface::~Interface() {}

}  // namespace riegeli


================================================
FILE: riegeli/brotli/brotli_allocator.h
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BROTLI_BROTLI_ALLOCATOR_H_
#define RIEGELI_BROTLI_BROTLI_ALLOCATOR_H_

// IWYU pragma: private, include "riegeli/brotli/brotli_reader.h"
// IWYU pragma: private, include "riegeli/brotli/brotli_writer.h"

#include <stddef.h>

#include <type_traits>
#include <utility>

#include "brotli/types.h"
#include "riegeli/base/shared_ptr.h"

namespace riegeli {

namespace brotli_internal {

// `extern "C"` sets the C calling convention for compatibility with the Brotli
// API.
extern "C" {
void* RiegeliBrotliAllocFunc(void* opaque, size_t size);
void RiegeliBrotliFreeFunc(void* opaque, void* ptr);
}  // extern "C"

}  // namespace brotli_internal

// Memory allocator used by the Brotli engine.
//
// `BrotliAllocator` adapts C++ functors to C function pointers required by the
// Brotli engine.
class BrotliAllocator {
 public:
  // Specifies the default allocator chosen by the Brotli engine.
  BrotliAllocator() = default;

  BrotliAllocator(const BrotliAllocator& that) = default;
  BrotliAllocator& operator=(const BrotliAllocator& that) = default;

  BrotliAllocator(BrotliAllocator&& that) = default;
  BrotliAllocator& operator=(BrotliAllocator&& that) = default;

  // Specifies functions to allocate and free a block of memory.
  //
  // Arguments should be functions of the following types, or equivalent
  // functors:
  //   `void* alloc_functor(size_t size)`
  //   `void free_functor(void* ptr)`
  template <typename AllocFunctor, typename FreeFunctor>
  explicit BrotliAllocator(AllocFunctor&& alloc_functor,
                           FreeFunctor&& free_functor);

  // Returns parameters for `Brotli{Encoder,Decoder}CreateInstance()`.
  brotli_alloc_func alloc_func() const;
  brotli_free_func free_func() const;
  void* opaque() const;

 private:
  friend void* brotli_internal::RiegeliBrotliAllocFunc(void* opaque,
                                                       size_t size);
  friend void brotli_internal::RiegeliBrotliFreeFunc(void* opaque, void* ptr);

  class Interface;

  template <typename AllocFunctor, typename FreeFunctor>
  class Implementation;

  SharedPtr<const Interface> impl_;
};

// Implementation details follow.

class BrotliAllocator::Interface {
 public:
  virtual ~Interface();

  virtual void* Alloc(size_t size) const = 0;
  virtual void Free(void* ptr) const = 0;
};

template <typename AllocFunctor, typename FreeFunctor>
class BrotliAllocator::Implementation : public Interface {
 public:
  template <typename AllocFunctorArg, typename FreeFunctorArg>
  explicit Implementation(AllocFunctorArg&& alloc_functor,
                          FreeFunctorArg&& free_functor)
      : alloc_functor_(std::forward<AllocFunctorArg>(alloc_functor)),
        free_functor_(std::forward<FreeFunctorArg>(free_functor)) {}

  void* Alloc(size_t size) const override { return alloc_functor_(size); }
  void Free(void* ptr) const override { free_functor_(ptr); }

 private:
  AllocFunctor alloc_functor_;
  FreeFunctor free_functor_;
};

template <typename AllocFunctor, typename FreeFunctor>
inline BrotliAllocator::BrotliAllocator(AllocFunctor&& alloc_functor,
                                        FreeFunctor&& free_functor)
    : impl_(riegeli::Maker<const Implementation<std::decay_t<AllocFunctor>,
                                                std::decay_t<FreeFunctor>>>(
          std::forward<AllocFunctor>(alloc_functor),
          std::forward<FreeFunctor>(free_functor))) {}

inline brotli_alloc_func BrotliAllocator::alloc_func() const {
  return impl_ == nullptr ? nullptr : brotli_internal::RiegeliBrotliAllocFunc;
}

inline brotli_free_func BrotliAllocator::free_func() const {
  return impl_ == nullptr ? nullptr : brotli_internal::RiegeliBrotliFreeFunc;
}

inline void* BrotliAllocator::opaque() const {
  return const_cast<Interface*>(impl_.get());
}

}  // namespace riegeli

#endif  // RIEGELI_BROTLI_BROTLI_ALLOCATOR_H_


================================================
FILE: riegeli/brotli/brotli_dictionary.cc
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/brotli/brotli_dictionary.h"

#include <stddef.h>
#include <stdint.h>

#include <memory>

#include "absl/base/attributes.h"
#include "absl/base/call_once.h"
#include "absl/strings/string_view.h"
#include "brotli/encode.h"
#include "brotli/shared_dictionary.h"
#include "riegeli/base/assert.h"

namespace riegeli {

const BrotliEncoderPreparedDictionary*
BrotliDictionary::Chunk::PrepareCompressionDictionary() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  absl::call_once(compression_once_, [&] {
    if (type_ == Type::kNative) {
      RIEGELI_ASSERT_NE(compression_dictionary_, nullptr)
          << "Failed invariant of BrotliDictionary::Chunk: "
             "unprepared native chunk";
      return;
    }
    owned_compression_dictionary_.reset(BrotliEncoderPrepareDictionary(
        static_cast<BrotliSharedDictionaryType>(type_), data_.size(),
        reinterpret_cast<const uint8_t*>(data_.data()), BROTLI_MAX_QUALITY,
        // `BrotliAllocator` is not supported here because the prepared
        // dictionary may easily outlive the allocator.
        nullptr, nullptr, nullptr));
    compression_dictionary_ = owned_compression_dictionary_.get();
  });
  return compression_dictionary_;
}

}  // namespace riegeli


================================================
FILE: riegeli/brotli/brotli_dictionary.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BROTLI_BROTLI_DICTIONARY_H_
#define RIEGELI_BROTLI_BROTLI_DICTIONARY_H_

// IWYU pragma: private, include "riegeli/brotli/brotli_reader.h"
// IWYU pragma: private, include "riegeli/brotli/brotli_writer.h"

#include <stddef.h>

#include <memory>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/base/call_once.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "brotli/encode.h"
#include "brotli/shared_dictionary.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/shared_ptr.h"

namespace riegeli {

// Stores an optional Brotli dictionary for compression and decompression
// (Shared Brotli).
//
// A dictionary is empty and is equivalent to having no dictionary, or contains
// a number of raw chunks (data which should contain sequences that are commonly
// seen in the data being compressed), or contains one serialized chunk
// (prepared by shared_brotli_encode_dictionary tool).
//
// A `BrotliDictionary` object can own the dictionary data, or can hold a
// pointer to unowned dictionary data which must not be changed until the last
// `BrotliReader` or `BrotliWriter` using this dictionary is closed or no
// longer used. A `BrotliDictionary` object also holds prepared structures
// derived from dictionary data. If the same dictionary is needed for multiple
// compression or decompression sessions, the `BrotliDictionary` object can be
// reused to avoid preparing them again for compression.
//
// Copying a `BrotliDictionary` object is cheap, sharing the actual
// dictionary.
class BrotliDictionary {
 public:
  class Chunk;

  enum class Type {
    // Chunk data should contain sequences that are commonly seen in the data
    // being compressed
    kRaw = BROTLI_SHARED_DICTIONARY_RAW,
    // Chunk data prepared by shared_brotli_encode_dictionary tool.
    kSerialized = BROTLI_SHARED_DICTIONARY_SERIALIZED,
    // Chunk represented by `BrotliEncoderPreparedDictionary` pointer.
    kNative = 2,
  };

  static constexpr size_t kMaxRawChunks = SHARED_BROTLI_MAX_COMPOUND_DICTS;

  // Creates an empty `BrotliDictionary`.
  BrotliDictionary() = default;

  BrotliDictionary(const BrotliDictionary& that) = default;
  BrotliDictionary& operator=(const BrotliDictionary& that) = default;

  BrotliDictionary(BrotliDictionary&& that) = default;
  BrotliDictionary& operator=(BrotliDictionary&& that) = default;

  // Resets the `BrotliDictionary` to the empty state.
  BrotliDictionary& Reset() & ABSL_ATTRIBUTE_LIFETIME_BOUND;
  BrotliDictionary&& Reset() && ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(Reset());
  }

  // Adds a raw chunk (data which should contain sequences that are commonly
  // seen in the data being compressed). Up to `kMaxRawChunks` can be added.
  BrotliDictionary& add_raw(BytesInitializer data) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND;
  BrotliDictionary&& add_raw(BytesInitializer data) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(add_raw(std::move(data)));
  }

  // Like `add_raw()`, but does not take ownership of `data`, which must not
  // be changed until the last `BrotliReader` or `BrotliWriter` using this
  // dictionary is closed or no longer used.
  BrotliDictionary& add_raw_unowned(
      absl::string_view data ABSL_ATTRIBUTE_LIFETIME_BOUND) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND;
  BrotliDictionary&& add_raw_unowned(
      absl::string_view data ABSL_ATTRIBUTE_LIFETIME_BOUND) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(add_raw_unowned(data));
  }

  // Sets a serialized chunk (prepared by shared_brotli_encode_dictionary tool).
  BrotliDictionary& set_serialized(BytesInitializer data) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND;
  BrotliDictionary&& set_serialized(BytesInitializer data) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_serialized(std::move(data)));
  }

  // Like `set_serialized()`, but does not take ownership of `data`, which
  // must not be changed until the last `BrotliWriter` or `BrotliReader` using
  // this dictionary is closed or no longer used.
  BrotliDictionary& set_serialized_unowned(
      absl::string_view data ABSL_ATTRIBUTE_LIFETIME_BOUND) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND;
  BrotliDictionary&& set_serialized_unowned(
      absl::string_view data ABSL_ATTRIBUTE_LIFETIME_BOUND) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_serialized_unowned(data));
  }

  // Interoperability with the native Brotli engine: adds a chunk represented by
  // `BrotliEncoderPreparedDictionary` pointer. It can be used for compression
  // but not for decompression.
  //
  // Does not take ownedship of `prepared, which must be valid until the last
  // `BrotliReader` or `BrotliWriter` using this dictionary is closed or no
  // longer used.
  BrotliDictionary& add_native_unowned(
      const BrotliEncoderPreparedDictionary* prepared
          ABSL_ATTRIBUTE_LIFETIME_BOUND) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND;
  BrotliDictionary&& add_native_unowned(
      const BrotliEncoderPreparedDictionary* prepared
          ABSL_ATTRIBUTE_LIFETIME_BOUND) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(add_native_unowned(prepared));
  }

  // Returns `true` if no dictionary is present.
  bool empty() const { return chunks_.empty(); }

  // Returns the sequence of chunks the dictionary consists of.
  absl::Span<const SharedPtr<const Chunk>> chunks() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return chunks_;
  }

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const BrotliDictionary* self,
                                        MemoryEstimator& memory_estimator) {
    memory_estimator.RegisterSubobjects(&self->chunks_);
  }

 private:
  enum class Ownership { kCopied, kUnowned };

  std::vector<SharedPtr<const Chunk>> chunks_;
};

class BrotliDictionary::Chunk {
 public:
  // Owns a copy of `data`.
  explicit Chunk(Type type, BytesInitializer data,
                 std::integral_constant<Ownership, Ownership::kCopied>)
      : type_(type), owned_data_(std::move(data)), data_(owned_data_) {}

  // Does not take ownership of `data`, which must not be changed until the
  // last `BrotliWriter` or `BrotliReader` using this dictionary is closed or
  // no longer used.
  explicit Chunk(Type type,
                 absl::string_view data ABSL_ATTRIBUTE_LIFETIME_BOUND,
                 std::integral_constant<Ownership, Ownership::kUnowned>)
      : type_(type), data_(data) {}

  // Does not know the data. The chunk is represented by
  // `BrotliEncoderPreparedDictionary` pointer.
  explicit Chunk(const BrotliEncoderPreparedDictionary* prepared
                     ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : type_(Type::kNative), compression_dictionary_(prepared) {}

  Chunk(const Chunk&) = delete;
  Chunk& operator=(const Chunk&) = delete;

  Type type() const { return type_; }
  absl::string_view data() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT_NE(static_cast<int>(type_), static_cast<int>(Type::kNative))
        << "Original data are not available "
           "for a native Brotli dictionary chunk";
    return data_;
  }

  // Returns the compression dictionary in the prepared form, or `nullptr` if
  // `BrotliEncoderPrepareDictionary()` failed.
  //
  // The dictionary is owned by `*this`.
  const BrotliEncoderPreparedDictionary* PrepareCompressionDictionary() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const Chunk* self,
                                        MemoryEstimator& memory_estimator) {
    memory_estimator.RegisterSubobjects(&self->owned_data_);
    if (const BrotliEncoderPreparedDictionary* const compression_dictionary =
            self->PrepareCompressionDictionary()) {
      memory_estimator.RegisterMemory(
          BrotliEncoderGetPreparedDictionarySize(compression_dictionary));
    }
  }

 private:
  struct BrotliEncoderDictionaryDeleter {
    void operator()(BrotliEncoderPreparedDictionary* ptr) const {
      BrotliEncoderDestroyPreparedDictionary(ptr);
    }
  };

  Type type_;
  std::string owned_data_;
  absl::string_view data_;

  mutable absl::once_flag compression_once_;
  mutable std::unique_ptr<BrotliEncoderPreparedDictionary,
                          BrotliEncoderDictionaryDeleter>
      owned_compression_dictionary_;
  mutable const BrotliEncoderPreparedDictionary* compression_dictionary_ =
      nullptr;
};

// Implementation details follow.

inline BrotliDictionary& BrotliDictionary::Reset() &
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  chunks_.clear();
  return *this;
}

inline BrotliDictionary& BrotliDictionary::add_raw(BytesInitializer data) &
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  chunks_.emplace_back(
      riegeli::Maker(Type::kRaw, std::move(data),
                     std::integral_constant<Ownership, Ownership::kCopied>()));
  return *this;
}

inline BrotliDictionary& BrotliDictionary::add_raw_unowned(
    absl::string_view data ABSL_ATTRIBUTE_LIFETIME_BOUND) &
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  chunks_.emplace_back(
      riegeli::Maker(Type::kRaw, data,
                     std::integral_constant<Ownership, Ownership::kUnowned>()));
  return *this;
}

inline BrotliDictionary& BrotliDictionary::set_serialized(
    BytesInitializer data) &
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  Reset();
  chunks_.emplace_back(
      riegeli::Maker(Type::kSerialized, std::move(data),
                     std::integral_constant<Ownership, Ownership::kCopied>()));
  return *this;
}

inline BrotliDictionary& BrotliDictionary::set_serialized_unowned(
    absl::string_view data ABSL_ATTRIBUTE_LIFETIME_BOUND) &
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  Reset();
  chunks_.emplace_back(
      riegeli::Maker(Type::kSerialized, data,
                     std::integral_constant<Ownership, Ownership::kUnowned>()));
  return *this;
}

inline BrotliDictionary& BrotliDictionary::add_native_unowned(
    const BrotliEncoderPreparedDictionary* prepared
        ABSL_ATTRIBUTE_LIFETIME_BOUND) &
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  chunks_.emplace_back(riegeli::Maker(prepared));
  return *this;
}

}  // namespace riegeli

#endif  // RIEGELI_BROTLI_BROTLI_DICTIONARY_H_


================================================
FILE: riegeli/brotli/brotli_reader.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/brotli/brotli_reader.h"

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <memory>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "brotli/decode.h"
#include "brotli/shared_dictionary.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/shared_ptr.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/pullable_reader.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

void BrotliReaderBase::Initialize(Reader* src) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of BrotliReader: null Reader pointer";
  if (ABSL_PREDICT_FALSE(!src->ok()) && src->available() == 0) {
    FailWithoutAnnotation(AnnotateOverSrc(src->status()));
    return;
  }
  initial_compressed_pos_ = src->pos();
  InitializeDecompressor();
}

inline void BrotliReaderBase::InitializeDecompressor() {
  decompressor_.reset(BrotliDecoderCreateInstance(
      allocator_.alloc_func(), allocator_.free_func(), allocator_.opaque()));
  if (ABSL_PREDICT_FALSE(decompressor_ == nullptr)) {
    Fail(absl::InternalError("BrotliDecoderCreateInstance() failed"));
    return;
  }
  if (ABSL_PREDICT_FALSE(!BrotliDecoderSetParameter(
          decompressor_.get(), BROTLI_DECODER_PARAM_LARGE_WINDOW,
          uint32_t{true}))) {
    Fail(absl::InternalError(
        "BrotliDecoderSetParameter(BROTLI_DECODER_PARAM_LARGE_WINDOW) failed"));
    return;
  }
  for (const SharedPtr<const BrotliDictionary::Chunk>& chunk :
       dictionary_.chunks()) {
    if (ABSL_PREDICT_FALSE(chunk->type() == BrotliDictionary::Type::kNative)) {
      Fail(absl::InvalidArgumentError(
          "A native Brotli dictionary chunk cannot be used for decompression"));
      return;
    }
    if (ABSL_PREDICT_FALSE(!BrotliDecoderAttachDictionary(
            decompressor_.get(),
            static_cast<BrotliSharedDictionaryType>(chunk->type()),
            chunk->data().size(),
            reinterpret_cast<const uint8_t*>(chunk->data().data())))) {
      Fail(absl::InternalError("BrotliDecoderAttachDictionary() failed"));
      return;
    }
  }
}

void BrotliReaderBase::Done() {
  if (ABSL_PREDICT_FALSE(truncated_)) {
    Reader& src = *SrcReader();
    FailWithoutAnnotation(AnnotateOverSrc(src.AnnotateStatus(
        absl::InvalidArgumentError("Truncated Brotli-compressed stream"))));
  }
  PullableReader::Done();
  decompressor_.reset();
  allocator_ = BrotliAllocator();
  dictionary_ = BrotliDictionary();
}

absl::Status BrotliReaderBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    if (ABSL_PREDICT_FALSE(truncated_)) {
      status = Annotate(status, "reading truncated Brotli-compressed stream");
    }
    Reader& src = *SrcReader();
    status = src.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `src` with the compressed position.
  // Clarify that the current position is the uncompressed position instead of
  // delegating to `PullableReader::AnnotateStatusImpl()`.
  return AnnotateOverSrc(std::move(status));
}

absl::Status BrotliReaderBase::AnnotateOverSrc(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("at uncompressed byte ", pos()));
  }
  return status;
}

bool BrotliReaderBase::PullBehindScratch(size_t recommended_length) {
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of PullableReader::PullBehindScratch(): "
         "some data available, use Pull() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::PullBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(decompressor_ == nullptr)) return false;
  Reader& src = *SrcReader();
  truncated_ = false;
  size_t available_out = 0;
  for (;;) {
    size_t available_in = src.available();
    const uint8_t* next_in = reinterpret_cast<const uint8_t*>(src.cursor());
    const BrotliDecoderResult result = BrotliDecoderDecompressStream(
        decompressor_.get(), &available_in, &next_in, &available_out, nullptr,
        nullptr);
    src.set_cursor(reinterpret_cast<const char*>(next_in));
    switch (result) {
      case BROTLI_DECODER_RESULT_ERROR:
        set_buffer();
        return Fail(absl::InvalidArgumentError(
            absl::StrCat("BrotliDecoderDecompressStream() failed: ",
                         BrotliDecoderErrorString(
                             BrotliDecoderGetErrorCode(decompressor_.get())))));
      case BROTLI_DECODER_RESULT_SUCCESS:
        set_buffer();
        decompressor_.reset();
        return false;
      case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT:
      case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT: {
        // Take the output first even if `BrotliDecoderDecompressStream()`
        // returned `BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT`, in order to be
        // able to read data which have been written before a `Flush()` without
        // waiting for data to be written after the `Flush()`.
        size_t length = 0;
        const char* const data = reinterpret_cast<const char*>(
            BrotliDecoderTakeOutput(decompressor_.get(), &length));
        if (length > 0) {
          const Position max_length =
              std::numeric_limits<Position>::max() - limit_pos();
          if (ABSL_PREDICT_FALSE(length > max_length)) {
            set_buffer(data, IntCast<size_t>(max_length));
            move_limit_pos(available());
            return FailOverflow();
          }
          set_buffer(data, length);
          move_limit_pos(available());
          return true;
        }
        RIEGELI_ASSERT_EQ(result, BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT)
            << "BrotliDecoderDecompressStream() returned "
               "BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT but "
               "BrotliDecoderTakeOutput() returned no data";
        if (ABSL_PREDICT_FALSE(!src.Pull())) {
          set_buffer();
          if (ABSL_PREDICT_FALSE(!src.ok())) {
            return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
          }
          truncated_ = true;
          return false;
        }
        continue;
      }
    }
    RIEGELI_ASSUME_UNREACHABLE()
        << "Unknown BrotliDecoderResult: " << static_cast<int>(result);
  }
}

bool BrotliReaderBase::ToleratesReadingAhead() {
  Reader* const src = SrcReader();
  return src != nullptr && src->ToleratesReadingAhead();
}

bool BrotliReaderBase::SupportsRewind() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsRewind();
}

bool BrotliReaderBase::SeekBehindScratch(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of PullableReader::SeekBehindScratch(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::SeekBehindScratch(): "
         "scratch used";
  if (new_pos <= limit_pos()) {
    // Seeking backwards.
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    Reader& src = *SrcReader();
    truncated_ = false;
    set_buffer();
    set_limit_pos(0);
    decompressor_.reset();
    if (ABSL_PREDICT_FALSE(!src.Seek(initial_compressed_pos_))) {
      return FailWithoutAnnotation(AnnotateOverSrc(src.StatusOrAnnotate(
          absl::DataLossError("Brotli-compressed stream got truncated"))));
    }
    InitializeDecompressor();
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    if (new_pos == 0) return true;
  }
  return PullableReader::SeekBehindScratch(new_pos);
}

bool BrotliReaderBase::SupportsNewReader() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsNewReader();
}

std::unique_ptr<Reader> BrotliReaderBase::NewReaderImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point
  // if `SrcReader()->SupportsNewReader()`.
  Reader& src = *SrcReader();
  std::unique_ptr<Reader> compressed_reader =
      src.NewReader(initial_compressed_pos_);
  if (ABSL_PREDICT_FALSE(compressed_reader == nullptr)) {
    FailWithoutAnnotation(AnnotateOverSrc(src.status()));
    return nullptr;
  }
  std::unique_ptr<Reader> reader =
      std::make_unique<BrotliReader<std::unique_ptr<Reader>>>(
          std::move(compressed_reader), BrotliReaderBase::Options()
                                            .set_dictionary(dictionary_)
                                            .set_allocator(allocator_));
  reader->Seek(initial_pos);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/brotli/brotli_reader.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BROTLI_BROTLI_READER_H_
#define RIEGELI_BROTLI_BROTLI_READER_H_

#include <stddef.h>

#include <memory>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "brotli/decode.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/brotli/brotli_allocator.h"   // IWYU pragma: export
#include "riegeli/brotli/brotli_dictionary.h"  // IWYU pragma: export
#include "riegeli/bytes/pullable_reader.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

// Template parameter independent part of `BrotliReader`.
class BrotliReaderBase : public PullableReader {
 public:
  class Options {
   public:
    Options() noexcept {}

    // Shared Brotli dictionary. The same dictionary must have been used
    // for compression. If no dictionary was used for compression, then no
    // dictionary must be supplied for decompression.
    //
    // Default: `BrotliDictionary()`.
    Options& set_dictionary(BrotliDictionary dictionary) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      dictionary_ = std::move(dictionary);
      return *this;
    }
    Options&& set_dictionary(BrotliDictionary dictionary) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_dictionary(std::move(dictionary)));
    }
    BrotliDictionary& dictionary() ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return dictionary_;
    }
    const BrotliDictionary& dictionary() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return dictionary_;
    }

    // Memory allocator used by the Brotli engine.
    //
    // Default: `BrotliAllocator()`.
    Options& set_allocator(BrotliAllocator allocator) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      allocator_ = std::move(allocator);
      return *this;
    }
    Options&& set_allocator(BrotliAllocator allocator) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_allocator(std::move(allocator)));
    }
    BrotliAllocator& allocator() ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return allocator_;
    }
    const BrotliAllocator& allocator() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return allocator_;
    }

   private:
    BrotliDictionary dictionary_;
    BrotliAllocator allocator_;
  };

  // Returns the compressed `Reader`. Unchanged by `Close()`.
  virtual Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns `true` if the source is truncated (without a clean end of the
  // compressed stream) at the current position. In such case, if the source
  // does not grow, `Close()` will fail.
  bool truncated() const { return truncated_ && available() == 0; }

  bool ToleratesReadingAhead() override;
  bool SupportsRewind() override;
  bool SupportsNewReader() override;

 protected:
  explicit BrotliReaderBase(Closed) noexcept : PullableReader(kClosed) {}

  explicit BrotliReaderBase(BrotliDictionary&& dictionary,
                            BrotliAllocator&& allocator);

  BrotliReaderBase(BrotliReaderBase&& that) noexcept;
  BrotliReaderBase& operator=(BrotliReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BrotliDictionary&& dictionary, BrotliAllocator&& allocator);
  void Initialize(Reader* src);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverSrc(absl::Status status);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool PullBehindScratch(size_t recommended_length) override;
  bool SeekBehindScratch(Position new_pos) override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;

 private:
  struct BrotliDecoderStateDeleter {
    void operator()(BrotliDecoderState* ptr) const {
      BrotliDecoderDestroyInstance(ptr);
    }
  };

  void InitializeDecompressor();

  BrotliDictionary dictionary_;
  BrotliAllocator allocator_;
  Position initial_compressed_pos_ = 0;
  // If `true`, the source is truncated (without a clean end of the compressed
  // stream) at the current position. If the source does not grow, `Close()`
  // will fail.
  bool truncated_ = false;
  // If `ok()` but `decompressor_ == nullptr` then all data have been
  // decompressed.
  std::unique_ptr<BrotliDecoderState, BrotliDecoderStateDeleter> decompressor_;

  // Invariant if scratch is not used:
  //   `start()` and `limit()` point to the buffer returned by
  //   `BrotliDecoderTakeOutput()` or are both `nullptr`
};

// A `Reader` which decompresses data with Brotli after getting it from another
// `Reader`.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the compressed `Reader`. `Src` must support
// `Dependency<Reader*, Src>`, e.g. `Reader*` (not owned, default),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The compressed `Reader` must not be accessed until the `BrotliReader` is
// closed or no longer used.
template <typename Src = Reader*>
class BrotliReader : public BrotliReaderBase {
 public:
  // Creates a closed `BrotliReader`.
  explicit BrotliReader(Closed) noexcept : BrotliReaderBase(kClosed) {}

  // Will read from the compressed `Reader` provided by `src`.
  explicit BrotliReader(Initializer<Src> src, Options options = Options());

  BrotliReader(BrotliReader&& that) = default;
  BrotliReader& operator=(BrotliReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `BrotliReader`. This avoids
  // constructing a temporary `BrotliReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());

  // Returns the object providing and possibly owning the compressed `Reader`.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;
  void SetReadAllHintImpl(bool read_all_hint) override;
  void VerifyEndImpl() override;

 private:
  // The object providing and possibly owning the compressed `Reader`.
  Dependency<Reader*, Src> src_;
};

explicit BrotliReader(Closed) -> BrotliReader<DeleteCtad<Closed>>;
template <typename Src>
explicit BrotliReader(
    Src&& src, BrotliReaderBase::Options options = BrotliReaderBase::Options())
    -> BrotliReader<TargetT<Src>>;

// Implementation details follow.

inline BrotliReaderBase::BrotliReaderBase(BrotliDictionary&& dictionary,
                                          BrotliAllocator&& allocator)
    : dictionary_(std::move(dictionary)), allocator_(std::move(allocator)) {}

inline BrotliReaderBase::BrotliReaderBase(BrotliReaderBase&& that) noexcept
    : PullableReader(static_cast<PullableReader&&>(that)),
      dictionary_(std::move(that.dictionary_)),
      allocator_(std::move(that.allocator_)),
      initial_compressed_pos_(that.initial_compressed_pos_),
      truncated_(that.truncated_),
      decompressor_(std::move(that.decompressor_)) {}

inline BrotliReaderBase& BrotliReaderBase::operator=(
    BrotliReaderBase&& that) noexcept {
  PullableReader::operator=(static_cast<PullableReader&&>(that));
  dictionary_ = std::move(that.dictionary_);
  allocator_ = std::move(that.allocator_);
  initial_compressed_pos_ = that.initial_compressed_pos_;
  truncated_ = that.truncated_;
  decompressor_ = std::move(that.decompressor_);
  return *this;
}

inline void BrotliReaderBase::Reset(Closed) {
  PullableReader::Reset(kClosed);
  initial_compressed_pos_ = 0;
  truncated_ = false;
  decompressor_.reset();
  // Must be destroyed after `decompressor_`.
  dictionary_ = BrotliDictionary();
  // Must be destroyed after `decompressor_`.
  allocator_ = BrotliAllocator();
}

inline void BrotliReaderBase::Reset(BrotliDictionary&& dictionary,
                                    BrotliAllocator&& allocator) {
  PullableReader::Reset();
  initial_compressed_pos_ = 0;
  truncated_ = false;
  decompressor_.reset();
  // Must be destroyed after `decompressor_`.
  dictionary_ = std::move(dictionary);
  // Must be destroyed after `decompressor_`.
  allocator_ = std::move(allocator);
}

template <typename Src>
inline BrotliReader<Src>::BrotliReader(Initializer<Src> src, Options options)
    : BrotliReaderBase(std::move(options.dictionary()),
                       std::move(options.allocator())),
      src_(std::move(src)) {
  Initialize(src_.get());
}

template <typename Src>
inline void BrotliReader<Src>::Reset(Closed) {
  BrotliReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void BrotliReader<Src>::Reset(Initializer<Src> src, Options options) {
  BrotliReaderBase::Reset(std::move(options.dictionary()),
                          std::move(options.allocator()));
  src_.Reset(std::move(src));
  Initialize(src_.get());
}

template <typename Src>
void BrotliReader<Src>::Done() {
  BrotliReaderBase::Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      FailWithoutAnnotation(AnnotateOverSrc(src_->status()));
    }
  }
}

template <typename Src>
void BrotliReader<Src>::SetReadAllHintImpl(bool read_all_hint) {
  if (src_.IsOwning()) src_->SetReadAllHint(read_all_hint);
}

template <typename Src>
void BrotliReader<Src>::VerifyEndImpl() {
  BrotliReaderBase::VerifyEndImpl();
  if (src_.IsOwning() && ABSL_PREDICT_TRUE(ok())) src_->VerifyEnd();
}

}  // namespace riegeli

#endif  // RIEGELI_BROTLI_BROTLI_READER_H_


================================================
FILE: riegeli/brotli/brotli_writer.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/brotli/brotli_writer.h"

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "brotli/encode.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/shared_ptr.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/brotli/brotli_reader.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

namespace {

struct BrotliEncoderDictionaryDeleter {
  void operator()(BrotliEncoderPreparedDictionary* ptr) const {
    BrotliEncoderDestroyPreparedDictionary(ptr);
  }
};

}  // namespace

void BrotliWriterBase::Initialize(Writer* dest, int compression_level,
                                  int window_log) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of BrotliWriter: null Writer pointer";
  if (ABSL_PREDICT_FALSE(!dest->ok())) {
    FailWithoutAnnotation(AnnotateOverDest(dest->status()));
    return;
  }
  initial_compressed_pos_ = dest->pos();
  compressor_.reset(BrotliEncoderCreateInstance(
      allocator_.alloc_func(), allocator_.free_func(), allocator_.opaque()));
  if (ABSL_PREDICT_FALSE(compressor_ == nullptr)) {
    Fail(absl::InternalError("BrotliEncoderCreateInstance() failed"));
    return;
  }
  if (ABSL_PREDICT_FALSE(
          !BrotliEncoderSetParameter(compressor_.get(), BROTLI_PARAM_QUALITY,
                                     IntCast<uint32_t>(compression_level)))) {
    Fail(absl::InternalError(
        "BrotliEncoderSetParameter(BROTLI_PARAM_QUALITY) failed"));
    return;
  }
  if (ABSL_PREDICT_FALSE(!BrotliEncoderSetParameter(
          compressor_.get(), BROTLI_PARAM_LARGE_WINDOW,
          uint32_t{window_log > BROTLI_MAX_WINDOW_BITS}))) {
    Fail(absl::InternalError(
        "BrotliEncoderSetParameter(BROTLI_PARAM_LARGE_WINDOW) failed"));
    return;
  }
  if (ABSL_PREDICT_FALSE(
          !BrotliEncoderSetParameter(compressor_.get(), BROTLI_PARAM_LGWIN,
                                     IntCast<uint32_t>(window_log)))) {
    Fail(absl::InternalError(
        "BrotliEncoderSetParameter(BROTLI_PARAM_LGWIN) failed"));
    return;
  }
  for (const SharedPtr<const BrotliDictionary::Chunk>& chunk :
       dictionary_.chunks()) {
    const BrotliEncoderPreparedDictionary* const compression_dictionary =
        chunk->PrepareCompressionDictionary();
    if (ABSL_PREDICT_FALSE(compression_dictionary == nullptr)) {
      Fail(absl::InternalError("BrotliEncoderPrepareDictionary() failed"));
      return;
    }
    if (ABSL_PREDICT_FALSE(!BrotliEncoderAttachPreparedDictionary(
            compressor_.get(), compression_dictionary))) {
      Fail(absl::InternalError(
          "BrotliEncoderAttachPreparedDictionary() failed"));
      return;
    }
  }
}

void BrotliWriterBase::DoneBehindBuffer(absl::string_view src) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::DoneBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!ok())) return;
  Writer& dest = *DestWriter();
  WriteInternal(src, dest, BROTLI_OPERATION_FINISH);
}

void BrotliWriterBase::Done() {
  BufferedWriter::Done();
  compressor_.reset();
  dictionary_ = BrotliDictionary();
  allocator_ = BrotliAllocator();
  associated_reader_.Reset();
}

absl::Status BrotliWriterBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    Writer& dest = *DestWriter();
    status = dest.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `dest` with the compressed
  // position. Clarify that the current position is the uncompressed position
  // instead of delegating to `BufferedWriter::AnnotateStatusImpl()`.
  return AnnotateOverDest(std::move(status));
}

absl::Status BrotliWriterBase::AnnotateOverDest(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("at uncompressed byte ", pos()));
  }
  return status;
}

void BrotliWriterBase::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  BufferedWriter::SetWriteSizeHintImpl(write_size_hint);
  if (ABSL_PREDICT_FALSE(!ok())) return;
  // Ignore failure if compression already started.
  BrotliEncoderSetParameter(compressor_.get(), BROTLI_PARAM_SIZE_HINT,
                            write_size_hint == std::nullopt
                                ? 0
                                : SaturatingIntCast<uint32_t>(
                                      SaturatingAdd(pos(), *write_size_hint)));
}

bool BrotliWriterBase::WriteInternal(absl::string_view src) {
  RIEGELI_ASSERT(!src.empty())
      << "Failed precondition of BufferedWriter::WriteInternal(): "
         "nothing to write";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedWriter::WriteInternal()";
  Writer& dest = *DestWriter();
  return WriteInternal(src, dest, BROTLI_OPERATION_PROCESS);
}

inline bool BrotliWriterBase::WriteInternal(absl::string_view src, Writer& dest,
                                            BrotliEncoderOperation op) {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BrotliWriterBase::WriteInternal()";
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  size_t available_in = src.size();
  const uint8_t* next_in = reinterpret_cast<const uint8_t*>(src.data());
  size_t available_out = 0;
  for (;;) {
    if (ABSL_PREDICT_FALSE(!BrotliEncoderCompressStream(
            compressor_.get(), op, &available_in, &next_in, &available_out,
            nullptr, nullptr))) {
      return Fail(absl::InternalError("BrotliEncoderCompressStream() failed"));
    }
    size_t length = 0;
    const char* const data = reinterpret_cast<const char*>(
        BrotliEncoderTakeOutput(compressor_.get(), &length));
    if (length > 0) {
      if (ABSL_PREDICT_FALSE(!dest.Write(absl::string_view(data, length)))) {
        return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
      }
    } else if (available_in == 0) {
      move_start_pos(src.size());
      return true;
    }
  }
}

bool BrotliWriterBase::FlushBehindBuffer(absl::string_view src,
                                         FlushType flush_type) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::FlushBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  return WriteInternal(src, dest, BROTLI_OPERATION_FLUSH);
}

bool BrotliWriterBase::SupportsReadMode() {
  Writer* const dest = DestWriter();
  if (dest != nullptr && dest->SupportsReadMode()) {
    for (const SharedPtr<const BrotliDictionary::Chunk>& chunk :
         dictionary_.chunks()) {
      if (chunk->type() == BrotliDictionary::Type::kNative) return false;
    }
    return true;
  }
  return false;
}

Reader* BrotliWriterBase::ReadModeBehindBuffer(Position initial_pos) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::ReadModeBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!BrotliWriterBase::FlushBehindBuffer(
          absl::string_view(), FlushType::kFromObject))) {
    return nullptr;
  }
  Writer& dest = *DestWriter();
  Reader* const compressed_reader = dest.ReadMode(initial_compressed_pos_);
  if (ABSL_PREDICT_FALSE(compressed_reader == nullptr)) {
    FailWithoutAnnotation(AnnotateOverDest(dest.status()));
    return nullptr;
  }
  BrotliReader<>* const reader = associated_reader_.ResetReader(
      compressed_reader, BrotliReaderBase::Options()
                             .set_dictionary(dictionary_)
                             .set_allocator(allocator_));
  reader->Seek(initial_pos);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/brotli/brotli_writer.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BROTLI_BROTLI_WRITER_H_
#define RIEGELI_BROTLI_BROTLI_WRITER_H_

#include <memory>
#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "brotli/encode.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/brotli/brotli_allocator.h"   // IWYU pragma: export
#include "riegeli/brotli/brotli_dictionary.h"  // IWYU pragma: export
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

template <typename Src>
class BrotliReader;
class Reader;

// Template parameter independent part of `BrotliWriter`.
class BrotliWriterBase : public BufferedWriter {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // Tunes the tradeoff between compression density and compression speed
    // (higher = better density but slower).
    //
    // `compression_level` must be between `kMinCompressionLevel` (0) and
    // `kMaxCompressionLevel` (11). Default: `kDefaultCompressionLevel` (6).
    static constexpr int kMinCompressionLevel = BROTLI_MIN_QUALITY;
    static constexpr int kMaxCompressionLevel = BROTLI_MAX_QUALITY;
    static constexpr int kDefaultCompressionLevel = 6;
    Options& set_compression_level(int compression_level) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GE(compression_level, kMinCompressionLevel)
          << "Failed precondition of "
             "BrotliWriterBase::Options::set_compression_level(): "
             "compression level out of range";
      RIEGELI_ASSERT_LE(compression_level, kMaxCompressionLevel)
          << "Failed precondition of "
             "BrotliWriterBase::Options::set_compression_level(): "
             "compression level out of range";
      compression_level_ = compression_level;
      return *this;
    }
    Options&& set_compression_level(int compression_level) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_compression_level(compression_level));
    }
    int compression_level() const { return compression_level_; }

    // Logarithm of the LZ77 sliding window size. This tunes the tradeoff
    // between compression density and memory usage (higher = better density but
    // more memory).
    //
    // `window_log` must be between `kMinWindowLog` (10) and `kMaxWindowLog`
    // (30). Default: `kDefaultWindowLog` (22).
    static constexpr int kMinWindowLog = BROTLI_MIN_WINDOW_BITS;
    static constexpr int kMaxWindowLog = BROTLI_LARGE_MAX_WINDOW_BITS;
    static constexpr int kDefaultWindowLog = BROTLI_DEFAULT_WINDOW;
    Options& set_window_log(int window_log) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GE(window_log, kMinWindowLog)
          << "Failed precondition of "
             "BrotliWriterBase::Options::set_window_log(): "
             "window log out of range";
      RIEGELI_ASSERT_LE(window_log, kMaxWindowLog)
          << "Failed precondition of "
             "BrotliWriterBase::Options::set_window_log(): "
             "window log out of range";
      window_log_ = window_log;
      return *this;
    }
    Options&& set_window_log(int window_log) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_window_log(window_log));
    }
    int window_log() const { return window_log_; }

    // Shared Brotli dictionary. The same dictionary must have been used for
    // compression.
    //
    // Default: `BrotliDictionary()`.
    Options& set_dictionary(BrotliDictionary dictionary) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      dictionary_ = std::move(dictionary);
      return *this;
    }
    Options&& set_dictionary(BrotliDictionary dictionary) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_dictionary(std::move(dictionary)));
    }
    BrotliDictionary& dictionary() ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return dictionary_;
    }
    const BrotliDictionary& dictionary() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return dictionary_;
    }

    // Memory allocator used by the Brotli engine.
    //
    // Default: `BrotliAllocator()`.
    Options& set_allocator(BrotliAllocator allocator) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      allocator_ = std::move(allocator);
      return *this;
    }
    Options&& set_allocator(BrotliAllocator allocator) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_allocator(std::move(allocator)));
    }
    BrotliAllocator& allocator() ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return allocator_;
    }
    const BrotliAllocator& allocator() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return allocator_;
    }

   private:
    int compression_level_ = kDefaultCompressionLevel;
    int window_log_ = kDefaultWindowLog;
    BrotliDictionary dictionary_;
    BrotliAllocator allocator_;
  };

  // Returns the compressed `Writer`. Unchanged by `Close()`.
  virtual Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool SupportsReadMode() override;

 protected:
  explicit BrotliWriterBase(Closed) noexcept : BufferedWriter(kClosed) {}

  explicit BrotliWriterBase(BufferOptions buffer_options,
                            BrotliDictionary&& dictionary,
                            BrotliAllocator&& allocator);

  BrotliWriterBase(BrotliWriterBase&& that) noexcept;
  BrotliWriterBase& operator=(BrotliWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options, BrotliDictionary&& dictionary,
             BrotliAllocator&& allocator);
  void Initialize(Writer* dest, int compression_level, int window_log);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverDest(absl::Status status);

  void DoneBehindBuffer(absl::string_view src) override;
  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool WriteInternal(absl::string_view src) override;
  bool FlushBehindBuffer(absl::string_view src, FlushType flush_type) override;
  Reader* ReadModeBehindBuffer(Position initial_pos) override;

 private:
  struct BrotliEncoderStateDeleter {
    void operator()(BrotliEncoderState* ptr) const {
      BrotliEncoderDestroyInstance(ptr);
    }
  };

  bool WriteInternal(absl::string_view src, Writer& dest,
                     BrotliEncoderOperation op);

  BrotliDictionary dictionary_;
  BrotliAllocator allocator_;
  Position initial_compressed_pos_ = 0;
  std::unique_ptr<BrotliEncoderState, BrotliEncoderStateDeleter> compressor_;

  AssociatedReader<BrotliReader<Reader*>> associated_reader_;
};

// A `Writer` which compresses data with Brotli before passing it to another
// `Writer`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the compressed `Writer`. `Dest` must support
// `Dependency<Writer*, Dest>`, e.g. `Writer*` (not owned, default),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The compressed `Writer` must not be accessed until the `BrotliWriter` is
// closed or no longer used, except that it is allowed to read the destination
// of the compressed `Writer` immediately after `Flush()`.
template <typename Dest = Writer*>
class BrotliWriter : public BrotliWriterBase {
 public:
  // Creates a closed `BrotliWriter`.
  explicit BrotliWriter(Closed) noexcept : BrotliWriterBase(kClosed) {}

  // Will write to the compressed `Writer` provided by `dest`.
  explicit BrotliWriter(Initializer<Dest> dest, Options options = Options());

  BrotliWriter(BrotliWriter&& that) = default;
  BrotliWriter& operator=(BrotliWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `BrotliWriter`. This avoids
  // constructing a temporary `BrotliWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the compressed `Writer`.
  // Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  // The object providing and possibly owning the compressed `Writer`.
  Dependency<Writer*, Dest> dest_;
};

explicit BrotliWriter(Closed) -> BrotliWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit BrotliWriter(Dest&& dest, BrotliWriterBase::Options options =
                                       BrotliWriterBase::Options())
    -> BrotliWriter<TargetT<Dest>>;

// Implementation details follow.

inline BrotliWriterBase::BrotliWriterBase(BufferOptions buffer_options,
                                          BrotliDictionary&& dictionary,
                                          BrotliAllocator&& allocator)
    : BufferedWriter(buffer_options),
      dictionary_(std::move(dictionary)),
      allocator_(std::move(allocator)) {}

inline BrotliWriterBase::BrotliWriterBase(BrotliWriterBase&& that) noexcept
    : BufferedWriter(static_cast<BufferedWriter&&>(that)),
      dictionary_(std::move(that.dictionary_)),
      allocator_(std::move(that.allocator_)),
      initial_compressed_pos_(that.initial_compressed_pos_),
      compressor_(std::move(that.compressor_)),
      associated_reader_(std::move(that.associated_reader_)) {}

inline BrotliWriterBase& BrotliWriterBase::operator=(
    BrotliWriterBase&& that) noexcept {
  BufferedWriter::operator=(static_cast<BufferedWriter&&>(that));
  dictionary_ = std::move(that.dictionary_);
  allocator_ = std::move(that.allocator_);
  initial_compressed_pos_ = that.initial_compressed_pos_;
  compressor_ = std::move(that.compressor_);
  associated_reader_ = std::move(that.associated_reader_);
  return *this;
}

inline void BrotliWriterBase::Reset(Closed) {
  BufferedWriter::Reset(kClosed);
  initial_compressed_pos_ = 0;
  compressor_.reset();
  // Must be destroyed after `compressor_`.
  dictionary_ = BrotliDictionary();
  // Must be destroyed after `compressor_`.
  allocator_ = BrotliAllocator();
  associated_reader_.Reset();
}

inline void BrotliWriterBase::Reset(BufferOptions buffer_options,
                                    BrotliDictionary&& dictionary,
                                    BrotliAllocator&& allocator) {
  BufferedWriter::Reset(buffer_options);
  initial_compressed_pos_ = 0;
  compressor_.reset();
  // Must be destroyed after `compressor_`.
  dictionary_ = std::move(dictionary);
  // Must be destroyed after `compressor_`.
  allocator_ = std::move(allocator);
  associated_reader_.Reset();
}

template <typename Dest>
inline BrotliWriter<Dest>::BrotliWriter(Initializer<Dest> dest, Options options)
    : BrotliWriterBase(options.buffer_options(),
                       std::move(options.dictionary()),
                       std::move(options.allocator())),
      dest_(std::move(dest)) {
  Initialize(dest_.get(), options.compression_level(), options.window_log());
}

template <typename Dest>
inline void BrotliWriter<Dest>::Reset(Closed) {
  BrotliWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void BrotliWriter<Dest>::Reset(Initializer<Dest> dest, Options options) {
  BrotliWriterBase::Reset(options.buffer_options(),
                          std::move(options.dictionary()),
                          std::move(options.allocator()));
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options.compression_level(), options.window_log());
}

template <typename Dest>
void BrotliWriter<Dest>::Done() {
  BrotliWriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
}

template <typename Dest>
bool BrotliWriter<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!BrotliWriterBase::FlushImpl(flush_type))) {
    return false;
  }
  if (flush_type != FlushType::kFromObject || dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Flush(flush_type))) {
      return FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
  return true;
}

}  // namespace riegeli

#endif  // RIEGELI_BROTLI_BROTLI_WRITER_H_


================================================
FILE: riegeli/bytes/BUILD
================================================
load("@bazel_skylib//rules:common_settings.bzl", "bool_flag")
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

bool_flag(
    name = "use_copy_file_range",
    build_setting_default = True,
)

config_setting(
    name = "disable_copy_file_range",
    flag_values = {":use_copy_file_range": "False"},
)

cc_library(
    name = "reader",
    hdrs = ["reader.h"],
    deps = [
        ":reader_and_writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:chain",
        "//riegeli/base:null_safe_memcpy",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "writer",
    hdrs = ["writer.h"],
    deps = [
        ":reader_and_writer",
        ":stringify",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:bytes_ref",
        "//riegeli/base:chain",
        "//riegeli/base:external_ref",
        "//riegeli/base:null_safe_memcpy",
        "//riegeli/base:object",
        "//riegeli/base:reset",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/numeric:int128",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "backward_writer",
    hdrs = ["backward_writer.h"],
    deps = [
        ":reader_and_writer",
        ":stringify",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:bytes_ref",
        "//riegeli/base:chain",
        "//riegeli/base:external_ref",
        "//riegeli/base:null_safe_memcpy",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/numeric:int128",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "reader_and_writer",
    srcs = [
        "backward_writer.cc",
        "reader.cc",
        "restricted_chain_writer.cc",
        "writer.cc",
    ],
    hdrs = [
        "backward_writer.h",
        "reader.h",
        "restricted_chain_writer.h",
        "writer.h",
    ],
    visibility = ["//visibility:private"],
    deps = [
        ":stringify",
        ":write_int_internal",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:bytes_ref",
        "//riegeli/base:chain",
        "//riegeli/base:cord_utils",
        "//riegeli/base:external_ref",
        "//riegeli/base:null_safe_memcpy",
        "//riegeli/base:object",
        "//riegeli/base:reset",
        "//riegeli/base:status",
        "//riegeli/base:string_utils",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/numeric:int128",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "read_all",
    srcs = ["read_all.cc"],
    hdrs = ["read_all.h"],
    deps = [
        ":reader",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:resize_and_overwrite",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "copy_all",
    srcs = ["copy_all.cc"],
    hdrs = ["copy_all.h"],
    deps = [
        ":backward_writer",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
    ],
)

cc_library(
    name = "write",
    hdrs = ["write.h"],
    deps = [
        ":backward_writer",
        ":stringify",
        ":writer",
        "//riegeli/base:dependency",
        "//riegeli/base:type_traits",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
    ],
)

cc_library(
    name = "write_int_internal",
    srcs = ["write_int_internal.cc"],
    hdrs = ["write_int_internal.h"],
    visibility = ["//riegeli:__subpackages__"],
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/numeric:bits",
        "@com_google_absl//absl/numeric:int128",
    ],
)

cc_library(
    name = "stringify",
    hdrs = ["stringify.h"],
    deps = [
        ":write_int_internal",
        "//riegeli/base:byte_fill",
        "//riegeli/base:bytes_ref",
        "//riegeli/base:chain",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/numeric:int128",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "pullable_reader",
    srcs = ["pullable_reader.cc"],
    hdrs = ["pullable_reader.h"],
    deps = [
        ":backward_writer",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:chain",
        "//riegeli/base:cord_utils",
        "//riegeli/base:external_ref",
        "//riegeli/base:null_safe_memcpy",
        "//riegeli/base:object",
        "//riegeli/base:sized_shared_buffer",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "pushable_writer",
    srcs = ["pushable_writer.cc"],
    hdrs = ["pushable_writer.h"],
    deps = [
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:cord_utils",
        "//riegeli/base:external_ref",
        "//riegeli/base:null_safe_memcpy",
        "//riegeli/base:object",
        "//riegeli/base:sized_shared_buffer",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "pushable_backward_writer",
    srcs = ["pushable_backward_writer.cc"],
    hdrs = ["pushable_backward_writer.h"],
    deps = [
        ":backward_writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:cord_utils",
        "//riegeli/base:external_ref",
        "//riegeli/base:null_safe_memcpy",
        "//riegeli/base:object",
        "//riegeli/base:sized_shared_buffer",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "buffer_options",
    srcs = ["buffer_options.cc"],
    hdrs = ["buffer_options.h"],
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/numeric:bits",
    ],
)

cc_library(
    name = "buffered_reader",
    srcs = ["buffered_reader.cc"],
    hdrs = ["buffered_reader.h"],
    deps = [
        ":backward_writer",
        ":buffer_options",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:chain",
        "//riegeli/base:external_ref",
        "//riegeli/base:null_safe_memcpy",
        "//riegeli/base:object",
        "//riegeli/base:sized_shared_buffer",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "buffered_writer",
    srcs = ["buffered_writer.cc"],
    hdrs = ["buffered_writer.h"],
    deps = [
        ":buffer_options",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffer",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "wrapping_reader",
    srcs = ["wrapping_reader.cc"],
    hdrs = ["wrapping_reader.h"],
    deps = [
        ":backward_writer",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:cord",
    ],
)

cc_library(
    name = "wrapping_writer",
    srcs = ["wrapping_writer.cc"],
    hdrs = ["wrapping_writer.h"],
    deps = [
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "wrapping_backward_writer",
    srcs = ["wrapping_backward_writer.cc"],
    hdrs = ["wrapping_backward_writer.h"],
    deps = [
        ":backward_writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "limiting_reader",
    srcs = ["limiting_reader.cc"],
    hdrs = ["limiting_reader.h"],
    deps = [
        ":backward_writer",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
    ],
)

cc_library(
    name = "limiting_writer",
    srcs = ["limiting_writer.cc"],
    hdrs = ["limiting_writer.h"],
    deps = [
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "limiting_backward_writer",
    srcs = ["limiting_backward_writer.cc"],
    hdrs = ["limiting_backward_writer.h"],
    deps = [
        ":backward_writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "prefix_limiting_reader",
    srcs = ["prefix_limiting_reader.cc"],
    hdrs = ["prefix_limiting_reader.h"],
    deps = [
        ":backward_writer",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
    ],
)

cc_library(
    name = "prefix_limiting_writer",
    srcs = ["prefix_limiting_writer.cc"],
    hdrs = ["prefix_limiting_writer.h"],
    deps = [
        ":prefix_limiting_reader",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "prefix_limiting_backward_writer",
    srcs = ["prefix_limiting_backward_writer.cc"],
    hdrs = ["prefix_limiting_backward_writer.h"],
    deps = [
        ":backward_writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "position_shifting_reader",
    srcs = ["position_shifting_reader.cc"],
    hdrs = ["position_shifting_reader.h"],
    deps = [
        ":backward_writer",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
    ],
)

cc_library(
    name = "position_shifting_writer",
    srcs = ["position_shifting_writer.cc"],
    hdrs = ["position_shifting_writer.h"],
    deps = [
        ":position_shifting_reader",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "position_shifting_backward_writer",
    srcs = ["position_shifting_backward_writer.cc"],
    hdrs = ["position_shifting_backward_writer.h"],
    deps = [
        ":backward_writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "null_writer",
    srcs = ["null_writer.cc"],
    hdrs = ["null_writer.h"],
    deps = [
        ":buffer_options",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffer",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:external_ref",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "null_backward_writer",
    srcs = ["null_backward_writer.cc"],
    hdrs = ["null_backward_writer.h"],
    deps = [
        ":backward_writer",
        ":buffer_options",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffer",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:external_ref",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "joining_reader",
    srcs = ["joining_reader.cc"],
    hdrs = ["joining_reader.h"],
    deps = [
        ":pullable_reader",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:chain",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
    ],
)

cc_library(
    name = "splitting_writer",
    srcs = ["splitting_writer.cc"],
    hdrs = ["splitting_writer.h"],
    deps = [
        ":chain_reader",
        ":cord_reader",
        ":pushable_writer",
        ":string_reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "reader_factory",
    srcs = ["reader_factory.cc"],
    hdrs = ["reader_factory.h"],
    deps = [
        ":buffer_options",
        ":pullable_reader",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:null_safe_memcpy",
        "//riegeli/base:object",
        "//riegeli/base:stable_dependency",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/synchronization",
    ],
)

cc_library(
    name = "array_writer",
    srcs = ["array_writer.cc"],
    hdrs = ["array_writer.h"],
    deps = [
        ":pushable_writer",
        ":reader",
        ":string_reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "array_backward_writer",
    srcs = ["array_backward_writer.cc"],
    hdrs = ["array_backward_writer.h"],
    deps = [
        ":backward_writer",
        ":pushable_backward_writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "string_reader",
    srcs = ["string_reader.cc"],
    hdrs = ["string_reader.h"],
    deps = [
        ":reader",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "string_writer",
    srcs = ["string_writer.cc"],
    hdrs = ["string_writer.h"],
    deps = [
        ":buffer_options",
        ":reader",
        ":string_reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:cord_utils",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:string_utils",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "resizable_writer",
    srcs = ["resizable_writer.cc"],
    hdrs = ["resizable_writer.h"],
    deps = [
        ":buffer_options",
        ":reader",
        ":string_reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:cord_utils",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "vector_writer",
    hdrs = ["vector_writer.h"],
    deps = [
        ":resizable_writer",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:uninitialized_vector",
    ],
)

cc_library(
    name = "compact_string_writer",
    hdrs = ["compact_string_writer.h"],
    deps = [
        ":resizable_writer",
        "//riegeli/base:assert",
        "//riegeli/base:compact_string",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
    ],
)

cc_library(
    name = "chain_reader",
    srcs = ["chain_reader.cc"],
    hdrs = ["chain_reader.h"],
    deps = [
        ":backward_writer",
        ":pullable_reader",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "chain_writer",
    srcs = ["chain_writer.cc"],
    hdrs = ["chain_writer.h"],
    deps = [
        ":chain_reader",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "chain_backward_writer",
    srcs = ["chain_backward_writer.cc"],
    hdrs = ["chain_backward_writer.h"],
    deps = [
        ":backward_writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "restricted_chain_writer",
    hdrs = ["restricted_chain_writer.h"],
    deps = [
        ":reader_and_writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:external_ref",
        "//riegeli/base:object",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "cord_reader",
    srcs = ["cord_reader.cc"],
    hdrs = ["cord_reader.h"],
    deps = [
        ":backward_writer",
        ":pullable_reader",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "cord_writer",
    srcs = ["cord_writer.cc"],
    hdrs = ["cord_writer.h"],
    deps = [
        ":cord_reader",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffer",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:cord_utils",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:null_safe_memcpy",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "cord_backward_writer",
    srcs = ["cord_backward_writer.cc"],
    hdrs = ["cord_backward_writer.h"],
    deps = [
        ":backward_writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffer",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:cord_utils",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:null_safe_memcpy",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "file_mode_string",
    srcs = ["file_mode_string.cc"],
    hdrs = ["file_mode_string.h"],
    visibility = ["//visibility:private"],
    deps = [
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "path_ref",
    hdrs = ["path_ref.h"],
    deps = [
        "//riegeli/base:compare",
        "//riegeli/base:initializer",
        "//riegeli/base:string_ref",
        "//riegeli/base:type_traits",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "fd_handle",
    srcs = ["fd_handle.cc"],
    hdrs = ["fd_handle.h"],
    # fd_handle.cc has #define before #include to influence what the included
    # files provide.
    features = ["-use_header_modules"],
    deps = [
        ":fd_internal",
        ":path_ref",
        "//riegeli/base:any",
        "//riegeli/base:compare",
        "//riegeli/base:initializer",
        "//riegeli/base:reset",
        "//riegeli/base:shared_ptr",
        "//riegeli/base:status",
        "//riegeli/base:type_erased_ref",
        "//riegeli/base:type_traits",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
    ] + select({
        "@platforms//os:windows": [
            "//riegeli/base:unicode",
        ],
        "//conditions:default": [],
    }),
)

cc_library(
    name = "fd_reader",
    srcs = ["fd_reader.cc"] + select({
        "@platforms//os:windows": [],
        "//conditions:default": ["fd_writer.h"],
    }),
    hdrs = ["fd_reader.h"],
    defines = select({
        "//riegeli/bytes:disable_copy_file_range": ["RIEGELI_DISABLE_COPY_FILE_RANGE"],
        "//conditions:default": [],
    }),
    # fd_reader.cc has #define before #include to influence what the included
    # files provide.
    features = ["-use_header_modules"],
    deps = [
        ":buffer_options",
        ":buffered_reader",
        ":fd_handle",
        ":fd_internal",
        ":path_ref",
        ":reader",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:byte_fill",
        "//riegeli/base:dependency",
        "//riegeli/base:global",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:reset",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/numeric:bits",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ] + select({
        "@platforms//os:windows": [],
        "//conditions:default": [
            ":buffered_writer",
            ":writer",
            "//riegeli/base:type_id",
        ],
    }),
)

cc_library(
    name = "fd_mmap_reader",
    srcs = ["fd_mmap_reader.cc"],
    hdrs = ["fd_mmap_reader.h"],
    # fd_mmap_reader.cc has #define before #include to influence what the
    # included files provide.
    features = ["-use_header_modules"],
    deps = [
        ":chain_reader",
        ":fd_handle",
        ":fd_internal",
        ":path_ref",
        ":reader",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:reset",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ] + select({
        "@platforms//os:windows": [],
        "//conditions:default": [
            "//riegeli/base:global",
            "@com_google_absl//absl/status:statusor",
        ],
    }),
)

cc_library(
    name = "fd_writer",
    srcs = ["fd_writer.cc"],
    hdrs = ["fd_writer.h"],
    # fd_writer.cc has #define before #include to influence what the included
    # files provide.
    features = ["-use_header_modules"],
    deps = [
        ":buffer_options",
        ":buffered_writer",
        ":fd_handle",
        ":fd_internal",
        ":fd_reader",
        ":path_ref",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:dependency",
        "//riegeli/base:global",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:reset",
        "//riegeli/base:status",
        "//riegeli/base:type_id",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/numeric:bits",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "fd_internal",
    srcs = ["fd_internal.cc"],
    hdrs = [
        "fd_internal.h",
        "fd_internal_for_cc.h",
    ],
    # fd_internal.cc has #define before #include to influence what the included
    # files provide.
    features = ["-use_header_modules"],
    visibility = ["//visibility:private"],
    deps = [
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ] + select({
        "@platforms//os:windows": [],
        "//conditions:default": [
            "//riegeli/base:arithmetic",
            "//riegeli/base:buffer",
            "@com_google_absl//absl/base:core_headers",
        ],
    }),
)

cc_library(
    name = "std_io",
    srcs = ["std_io.cc"],
    hdrs = ["std_io.h"],
    deps = [
        ":fd_handle",
        ":fd_reader",
        ":fd_writer",
        "//riegeli/base:assert",
        "//riegeli/base:global",
        "//riegeli/base:object",
        "//riegeli/base:sized_shared_buffer",
        "@com_google_absl//absl/base:core_headers",
    ],
)

cc_library(
    name = "istream_reader",
    srcs = [
        "iostream_internal.h",
        "istream_reader.cc",
    ],
    hdrs = ["istream_reader.h"],
    deps = [
        ":buffer_options",
        ":buffered_reader",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:global",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "ostream_writer",
    srcs = [
        "iostream_internal.h",
        "ostream_writer.cc",
    ],
    hdrs = ["ostream_writer.h"],
    deps = [
        ":buffer_options",
        ":buffered_writer",
        ":istream_reader",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:global",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "reader_istream",
    srcs = ["reader_istream.cc"],
    hdrs = ["reader_istream.h"],
    deps = [
        ":reader",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:null_safe_memcpy",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
    ],
)

cc_library(
    name = "writer_ostream",
    srcs = ["writer_ostream.cc"],
    hdrs = ["writer_ostream.h"],
    deps = [
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:null_safe_memcpy",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "cfile_handle",
    srcs = ["cfile_handle.cc"],
    hdrs = ["cfile_handle.h"],
    # cfile_handle.cc has #define before #include to influence what the
    # included files provide.
    features = ["-use_header_modules"],
    deps = [
        ":cfile_internal",
        ":path_ref",
        "//riegeli/base:any",
        "//riegeli/base:c_string_ref",
        "//riegeli/base:compare",
        "//riegeli/base:initializer",
        "//riegeli/base:reset",
        "//riegeli/base:shared_ptr",
        "//riegeli/base:status",
        "//riegeli/base:type_erased_ref",
        "//riegeli/base:type_traits",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ] + select({
        "@platforms//os:windows": [
            "//riegeli/base:unicode",
        ],
        "//conditions:default": [],
    }),
)

cc_library(
    name = "cfile_reader",
    srcs = ["cfile_reader.cc"],
    hdrs = ["cfile_reader.h"],
    features = select({
        "@platforms//os:windows": [],
        # cfile_reader.cc has #define before #include to influence what the
        # included files provide.
        "//conditions:default": ["-use_header_modules"],
    }),
    deps = [
        ":buffer_options",
        ":buffered_reader",
        ":cfile_handle",
        ":cfile_internal",
        ":file_mode_string",
        ":path_ref",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:global",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:reset",
        "//riegeli/base:status",
        "//riegeli/base:string_ref",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:dynamic_annotations",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "cfile_writer",
    srcs = ["cfile_writer.cc"],
    hdrs = ["cfile_writer.h"],
    features = select({
        "@platforms//os:windows": [],
        # cfile_writer.cc has #define before #include to influence what the
        # included files provide.
        "//conditions:default": ["-use_header_modules"],
    }),
    deps = [
        ":buffer_options",
        ":buffered_writer",
        ":cfile_handle",
        ":cfile_internal",
        ":cfile_reader",
        ":file_mode_string",
        ":path_ref",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:global",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:reset",
        "//riegeli/base:status",
        "//riegeli/base:string_ref",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "cfile_internal",
    srcs = ["cfile_internal.cc"],
    hdrs = [
        "cfile_internal.h",
        "cfile_internal_for_cc.h",
    ],
    visibility = ["//visibility:private"],
    deps = [
        ":fd_internal",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "reader_cfile",
    srcs = ["reader_cfile.cc"],
    hdrs = ["reader_cfile.h"],
    # reader_cfile.cc has #define before #include to influence what the included
    # files provide.
    features = ["-use_header_modules"],
    deps = [
        ":cfile_handle",
        ":path_ref",
        ":reader",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:reset",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
    ],
)

cc_library(
    name = "writer_cfile",
    srcs = [
        "writer_cfile.cc",
    ],
    hdrs = ["writer_cfile.h"],
    # writer_cfile.cc has #define before #include to influence what the included
    # files provide.
    features = ["-use_header_modules"],
    deps = [
        ":cfile_handle",
        ":path_ref",
        ":reader",
        ":writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:reset",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:dynamic_annotations",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "stringify_writer",
    hdrs = ["stringify_writer.h"],
    deps = [
        ":buffered_writer",
        ":prefix_limiting_writer",
        ":writer",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:object",
        "//riegeli/base:reset",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
    ],
)


================================================
FILE: riegeli/bytes/array_backward_writer.cc
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/array_backward_writer.h"

#include <stddef.h>

#include "absl/base/optimization.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/types.h"

namespace riegeli {

bool ArrayBackwardWriterBase::PushBehindScratch(size_t recommended_length) {
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of PushableBackwardWriter::PushBehindScratch(): "
         "some space available, use Push() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableBackwardWriter::PushBehindScratch(): "
         "scratch used";
  return ForcePushUsingScratch();
}

bool ArrayBackwardWriterBase::WriteBehindScratch(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of "
         "PushableBackwardWriter::WriteBehindScratch(string_view): "
         "enough space available, use Write(string_view) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PushableBackwardWriter::WriteBehindScratch(string_view): "
         "scratch used";
  return FailOverflow();
}

bool ArrayBackwardWriterBase::FlushBehindScratch(FlushType flush_type) {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableBackwardWriter::FlushBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  written_ = absl::MakeSpan(cursor(), start_to_cursor());
  return true;
}

bool ArrayBackwardWriterBase::TruncateBehindScratch(Position new_size) {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PushableBackwardWriter::TruncateBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(new_size > start_to_cursor())) return false;
  set_cursor(start() - IntCast<size_t>(new_size));
  return true;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/array_backward_writer.h
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_ARRAY_BACKWARD_WRITER_H_
#define RIEGELI_BYTES_ARRAY_BACKWARD_WRITER_H_

#include <stddef.h>

#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/pushable_backward_writer.h"

namespace riegeli {

// Template parameter independent part of `ArrayBackwardWriter`.
class ArrayBackwardWriterBase : public PushableBackwardWriter {
 public:
  // Returns the array being written to. Unchanged by `Close()`.
  virtual absl::Span<char> DestSpan() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns written data in a suffix of the original array. Valid only after
  // `Close()` or `Flush()`.
  absl::Span<char> written() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return written_;
  }

  bool SupportsTruncate() override { return true; }

 protected:
  using PushableBackwardWriter::PushableBackwardWriter;

  ArrayBackwardWriterBase(ArrayBackwardWriterBase&& that) noexcept;
  ArrayBackwardWriterBase& operator=(ArrayBackwardWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(absl::Span<char> dest);
  void set_written(absl::Span<char> written) { written_ = written; }

  bool PushBehindScratch(size_t recommended_length) override;
  using PushableBackwardWriter::WriteBehindScratch;
  bool WriteBehindScratch(absl::string_view src) override;
  bool FlushBehindScratch(FlushType flush_type) override;
  bool TruncateBehindScratch(Position new_size) override;

 private:
  // Written data. Valid only after `Close()` or `Flush()`.
  absl::Span<char> written_;

  // Invariants if `ok()` and scratch is not used:
  //   `limit() == DestSpan().data()`
  //   `start_to_limit() == DestSpan().size()`
  //   `start_pos() == 0`
};

// A `BackwardWriter` which writes to a preallocated array with a known size
// limit.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the array being written to. `Dest` must support
// `Dependency<absl::Span<char>, Dest>`, e.g.
// `absl::Span<char>` (not owned, default), `std::string*` (not owned),
// `std::string` (owned), `Any<absl::Span<char>>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument, except that CTAD is deleted if the
// first constructor argument is a reference to a type that `absl::Span<char>`
// would be constructible from, other than `absl::Span<char>` itself (to avoid
// writing to an unintentionally separate copy of an existing object).
//
// The array must not be destroyed until the `ArrayBackwardWriter` is closed or
// no longer used.
template <typename Dest = absl::Span<char>>
class ArrayBackwardWriter : public ArrayBackwardWriterBase {
 public:
  // Creates a closed `ArrayBackwardWriter`.
  explicit ArrayBackwardWriter(Closed) noexcept
      : ArrayBackwardWriterBase(kClosed) {}

  // Will write to the array provided by `dest`.
  explicit ArrayBackwardWriter(Initializer<Dest> dest);

  // Will write to `absl::MakeSpan(dest, size)`. This constructor is present
  // only if `Dest` is `absl::Span<char>`.
  template <typename DependentDest = Dest,
            std::enable_if_t<std::is_same_v<DependentDest, absl::Span<char>>,
                             int> = 0>
  explicit ArrayBackwardWriter(char* dest ABSL_ATTRIBUTE_LIFETIME_BOUND,
                               size_t size);

  ArrayBackwardWriter(ArrayBackwardWriter&& that) = default;
  ArrayBackwardWriter& operator=(ArrayBackwardWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `ArrayBackwardWriter`. This
  // avoids constructing a temporary `ArrayBackwardWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest);
  template <typename DependentDest = Dest,
            std::enable_if_t<std::is_same_v<DependentDest, absl::Span<char>>,
                             int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(char* dest, size_t size);

  // Returns the object providing and possibly owning the array being written
  // to. Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  absl::Span<char> DestSpan() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 private:
  class Mover;

  // The object providing and possibly owning the array being written to.
  MovingDependency<absl::Span<char>, Dest, Mover> dest_;
};

explicit ArrayBackwardWriter(Closed) -> ArrayBackwardWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit ArrayBackwardWriter(Dest&& dest)
    -> ArrayBackwardWriter<std::conditional_t<
        std::conjunction_v<
            std::negation<std::is_same<std::decay_t<Dest>, absl::Span<char>>>,
            std::is_lvalue_reference<Dest>,
            std::is_constructible<absl::Span<char>, Dest>>,
        DeleteCtad<Dest&&>, TargetT<Dest>>>;
explicit ArrayBackwardWriter(char* dest ABSL_ATTRIBUTE_LIFETIME_BOUND,
                             size_t size)
    -> ArrayBackwardWriter<absl::Span<char>>;

// Implementation details follow.

inline ArrayBackwardWriterBase::ArrayBackwardWriterBase(
    ArrayBackwardWriterBase&& that) noexcept
    : PushableBackwardWriter(static_cast<PushableBackwardWriter&&>(that)),
      written_(that.written_) {}

inline ArrayBackwardWriterBase& ArrayBackwardWriterBase::operator=(
    ArrayBackwardWriterBase&& that) noexcept {
  PushableBackwardWriter::operator=(
      static_cast<PushableBackwardWriter&&>(that));
  written_ = that.written_;
  return *this;
}

inline void ArrayBackwardWriterBase::Reset(Closed) {
  PushableBackwardWriter::Reset(kClosed);
  written_ = absl::Span<char>();
}

inline void ArrayBackwardWriterBase::Reset() {
  PushableBackwardWriter::Reset();
  written_ = absl::Span<char>();
}

inline void ArrayBackwardWriterBase::Initialize(absl::Span<char> dest) {
  set_buffer(dest.data(), dest.size());
}

template <typename Dest>
class ArrayBackwardWriter<Dest>::Mover {
 public:
  static auto member() { return &ArrayBackwardWriter::dest_; }

  explicit Mover(ArrayBackwardWriter& self, ArrayBackwardWriter& that)
      : behind_scratch_(&self),
        uses_buffer_(self.start() != nullptr),
        start_to_cursor_(self.start_to_cursor()),
        has_written_(self.written().data() != nullptr),
        written_size_(self.written().size()) {
    if (uses_buffer_) {
      RIEGELI_ASSERT_EQ(that.dest_.get().data(), self.limit())
          << "ArrayBackwardWriter destination changed unexpectedly";
      RIEGELI_ASSERT_EQ(that.dest_.get().size(), self.start_to_limit())
          << "ArrayBackwardWriter destination changed unexpectedly";
    }
    if (has_written_) {
      RIEGELI_ASSERT(that.dest_.get().data() + that.dest_.get().size() ==
                     self.written().data() + self.written().size())
          << "ArrayBackwardWriter destination changed unexpectedly";
    }
  }

  void Done(ArrayBackwardWriter& self) {
    if (uses_buffer_) {
      const absl::Span<char> dest = self.dest_.get();
      self.set_buffer(dest.data(), dest.size(), start_to_cursor_);
    }
    if (has_written_) {
      const absl::Span<char> dest = self.dest_.get();
      self.set_written(absl::MakeSpan(dest.data() + dest.size() - written_size_,
                                      written_size_));
    }
  }

 private:
  BehindScratch behind_scratch_;
  bool uses_buffer_;
  size_t start_to_cursor_;
  bool has_written_;
  size_t written_size_;
};

template <typename Dest>
inline ArrayBackwardWriter<Dest>::ArrayBackwardWriter(Initializer<Dest> dest)
    : dest_(std::move(dest)) {
  Initialize(dest_.get());
}

template <typename Dest>
template <
    typename DependentDest,
    std::enable_if_t<std::is_same_v<DependentDest, absl::Span<char>>, int>>
inline ArrayBackwardWriter<Dest>::ArrayBackwardWriter(char* dest, size_t size)
    : ArrayBackwardWriter(absl::MakeSpan(dest, size)) {}

template <typename Dest>
inline void ArrayBackwardWriter<Dest>::Reset(Closed) {
  ArrayBackwardWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void ArrayBackwardWriter<Dest>::Reset(Initializer<Dest> dest) {
  ArrayBackwardWriterBase::Reset();
  dest_.Reset(std::move(dest));
  Initialize(dest_.get());
}

template <typename Dest>
template <
    typename DependentDest,
    std::enable_if_t<std::is_same_v<DependentDest, absl::Span<char>>, int>>
inline void ArrayBackwardWriter<Dest>::Reset(char* dest, size_t size) {
  Reset(absl::MakeSpan(dest, size));
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_ARRAY_BACKWARD_WRITER_H_


================================================
FILE: riegeli/bytes/array_writer.cc
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/array_writer.h"

#include <stddef.h>

#include <optional>

#include "absl/base/optimization.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/pushable_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/string_reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void ArrayWriterBase::Done() {
  PushableWriter::Done();
  associated_reader_.Reset();
}

bool ArrayWriterBase::PushBehindScratch(size_t recommended_length) {
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of PushableWriter::PushBehindScratch(): "
         "some space available, use Push() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::PushBehindScratch(): "
         "scratch used";
  return ForcePushUsingScratch();
}

bool ArrayWriterBase::WriteBehindScratch(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of "
         "PushableWriter::WriteBehindScratch(string_view): "
         "enough space available, use Write(string_view) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PushableWriter::WriteBehindScratch(string_view): "
         "scratch used";
  return FailOverflow();
}

bool ArrayWriterBase::FlushBehindScratch(FlushType flush_type) {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::FlushBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const size_t size = UnsignedMax(start_to_cursor(), written_.size());
  written_ = absl::MakeSpan(start(), size);
  return true;
}

bool ArrayWriterBase::SeekBehindScratch(Position new_pos) {
  RIEGELI_ASSERT_NE(new_pos, pos())
      << "Failed precondition of PushableWriter::SeekBehindScratch(): "
         "position unchanged, use Seek() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::SeekBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const size_t size = UnsignedMax(start_to_cursor(), written_.size());
  if (ABSL_PREDICT_FALSE(new_pos > size)) {
    set_cursor(start() + size);
    return false;
  }
  written_ = absl::MakeSpan(start(), size);
  set_cursor(start() + IntCast<size_t>(new_pos));
  return true;
}

std::optional<Position> ArrayWriterBase::SizeBehindScratch() {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::SizeBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  return UnsignedMax(start_to_cursor(), written_.size());
}

bool ArrayWriterBase::TruncateBehindScratch(Position new_size) {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::TruncateBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const size_t size = UnsignedMax(start_to_cursor(), written_.size());
  if (ABSL_PREDICT_FALSE(new_size > size)) {
    set_cursor(start() + size);
    return false;
  }
  written_ = absl::MakeSpan(start(), IntCast<size_t>(new_size));
  set_cursor(start() + IntCast<size_t>(new_size));
  return true;
}

Reader* ArrayWriterBase::ReadModeBehindScratch(Position initial_pos) {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::ReadModeBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  const size_t size = UnsignedMax(start_to_cursor(), written_.size());
  StringReader<>* const reader = associated_reader_.ResetReader(start(), size);
  reader->Seek(initial_pos);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/array_writer.h
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_ARRAY_WRITER_H_
#define RIEGELI_BYTES_ARRAY_WRITER_H_

#include <stddef.h>

#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/pushable_writer.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

class Reader;
template <typename Src>
class StringReader;

// Template parameter independent part of `ArrayWriter`.
class ArrayWriterBase : public PushableWriter {
 public:
  // Returns the array being written to. Unchanged by `Close()`.
  virtual absl::Span<char> DestSpan() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns written data in a prefix of the original array. Valid only after
  // `Close()` or `Flush()`.
  absl::Span<char> written() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return written_;
  }
  absl::Span<char> Digest() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    Flush();
    return written();
  }

  bool SupportsRandomAccess() override { return true; }
  bool SupportsReadMode() override { return true; }

 protected:
  using PushableWriter::PushableWriter;

  ArrayWriterBase(ArrayWriterBase&& that) noexcept;
  ArrayWriterBase& operator=(ArrayWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(absl::Span<char> dest);
  void set_written(absl::Span<char> written) { written_ = written; }

  void Done() override;
  bool PushBehindScratch(size_t recommended_length) override;
  using PushableWriter::WriteBehindScratch;
  bool WriteBehindScratch(absl::string_view src) override;
  bool FlushBehindScratch(FlushType flush_type) override;
  bool SeekBehindScratch(Position new_pos) override;
  std::optional<Position> SizeBehindScratch() override;
  bool TruncateBehindScratch(Position new_size) override;
  Reader* ReadModeBehindScratch(Position initial_pos) override;

 private:
  // Written data. Valid only after `Close()` or `Flush()`.
  //
  // Size of written data is always `UnsignedMax(pos(), written_.size())`.
  // This is used to determine the size after seeking backwards.
  absl::Span<char> written_;

  AssociatedReader<StringReader<absl::string_view>> associated_reader_;

  // Invariants if `ok()` and scratch is not used:
  //   `start() == DestSpan().data()`
  //   `start_to_limit() == DestSpan().size()`
  //   `start_pos() == 0`
};

// A `Writer` which writes to a preallocated array with a known size limit.
//
// It supports `ReadMode()`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the array being written to. `Dest` must support
// `Dependency<absl::Span<char>, Dest>`, e.g.
// `absl::Span<char>` (not owned, default), `std::string*` (not owned),
// `std::string` (owned), `Any<absl::Span<char>>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument, except that CTAD is deleted if the
// first constructor argument is a reference to a type that `absl::Span<char>`
// would be constructible from, other than `absl::Span<char>` itself (to avoid
// writing to an unintentionally separate copy of an existing object).
//
// The array must not be destroyed until the `ArrayWriter` is closed or no
// longer used.
template <typename Dest = absl::Span<char>>
class ArrayWriter : public ArrayWriterBase {
 public:
  // Creates a closed `ArrayWriter`.
  explicit ArrayWriter(Closed) noexcept : ArrayWriterBase(kClosed) {}

  // Will write to the array provided by `dest`.
  explicit ArrayWriter(Initializer<Dest> dest);

  // Will write to `absl::MakeSpan(dest, size)`. This constructor is present
  // only if `Dest` is `absl::Span<char>`.
  template <typename DependentDest = Dest,
            std::enable_if_t<std::is_same_v<DependentDest, absl::Span<char>>,
                             int> = 0>
  explicit ArrayWriter(char* dest ABSL_ATTRIBUTE_LIFETIME_BOUND, size_t size);

  ArrayWriter(ArrayWriter&& that) = default;
  ArrayWriter& operator=(ArrayWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `ArrayWriter`. This avoids
  // constructing a temporary `ArrayWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest);
  template <typename DependentDest = Dest,
            std::enable_if_t<std::is_same_v<DependentDest, absl::Span<char>>,
                             int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(char* dest, size_t size);

  // Returns the object providing and possibly owning the array being written
  // to. Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  absl::Span<char> DestSpan() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 private:
  class Mover;

  // The object providing and possibly owning the array being written to.
  MovingDependency<absl::Span<char>, Dest, Mover> dest_;
};

explicit ArrayWriter(Closed) -> ArrayWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit ArrayWriter(Dest&& dest) -> ArrayWriter<std::conditional_t<
    std::conjunction_v<
        std::negation<std::is_same<std::decay_t<Dest>, absl::Span<char>>>,
        std::is_lvalue_reference<Dest>,
        std::is_constructible<absl::Span<char>, Dest>>,
    DeleteCtad<Dest&&>, TargetT<Dest>>>;
explicit ArrayWriter(char* dest, size_t size) -> ArrayWriter<absl::Span<char>>;

// Implementation details follow.

inline ArrayWriterBase::ArrayWriterBase(ArrayWriterBase&& that) noexcept
    : PushableWriter(static_cast<PushableWriter&&>(that)),
      written_(that.written_),
      associated_reader_(std::move(that.associated_reader_)) {}

inline ArrayWriterBase& ArrayWriterBase::operator=(
    ArrayWriterBase&& that) noexcept {
  PushableWriter::operator=(static_cast<PushableWriter&&>(that));
  written_ = that.written_;
  associated_reader_ = std::move(that.associated_reader_);
  return *this;
}

inline void ArrayWriterBase::Reset(Closed) {
  PushableWriter::Reset(kClosed);
  written_ = absl::Span<char>();
  associated_reader_.Reset();
}

inline void ArrayWriterBase::Reset() {
  PushableWriter::Reset();
  written_ = absl::Span<char>();
  associated_reader_.Reset();
}

inline void ArrayWriterBase::Initialize(absl::Span<char> dest) {
  set_buffer(dest.data(), dest.size());
}

template <typename Dest>
class ArrayWriter<Dest>::Mover {
 public:
  static auto member() { return &ArrayWriter::dest_; }

  explicit Mover(ArrayWriter& self, ArrayWriter& that)
      : behind_scratch_(&self),
        uses_buffer_(self.start() != nullptr),
        start_to_cursor_(self.start_to_cursor()),
        has_written_(self.written().data() != nullptr),
        written_size_(self.written().size()) {
    if (uses_buffer_) {
      RIEGELI_ASSERT_EQ(that.dest_.get().data(), self.start())
          << "ArrayWriter destination changed unexpectedly";
      RIEGELI_ASSERT_EQ(that.dest_.get().size(), self.start_to_limit())
          << "ArrayWriter destination changed unexpectedly";
    }
    if (has_written_) {
      RIEGELI_ASSERT_EQ(that.dest_.get().data(), self.written().data())
          << "ArrayWriter destination changed unexpectedly";
    }
  }

  void Done(ArrayWriter& self) {
    if (uses_buffer_) {
      const absl::Span<char> dest = self.dest_.get();
      self.set_buffer(dest.data(), dest.size(), start_to_cursor_);
    }
    if (has_written_) {
      const absl::Span<char> dest = self.dest_.get();
      self.set_written(absl::MakeSpan(dest.data(), written_size_));
    }
  }

 private:
  BehindScratch behind_scratch_;
  bool uses_buffer_;
  size_t start_to_cursor_;
  bool has_written_;
  size_t written_size_;
};

template <typename Dest>
inline ArrayWriter<Dest>::ArrayWriter(Initializer<Dest> dest)
    : dest_(std::move(dest)) {
  Initialize(dest_.get());
}

template <typename Dest>
template <
    typename DependentDest,
    std::enable_if_t<std::is_same_v<DependentDest, absl::Span<char>>, int>>
inline ArrayWriter<Dest>::ArrayWriter(char* dest ABSL_ATTRIBUTE_LIFETIME_BOUND,
                                      size_t size)
    : ArrayWriter(absl::MakeSpan(dest, size)) {}

template <typename Dest>
inline void ArrayWriter<Dest>::Reset(Closed) {
  ArrayWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void ArrayWriter<Dest>::Reset(Initializer<Dest> dest) {
  ArrayWriterBase::Reset();
  dest_.Reset(std::move(dest));
  Initialize(dest_.get());
}

template <typename Dest>
template <
    typename DependentDest,
    std::enable_if_t<std::is_same_v<DependentDest, absl::Span<char>>, int>>
inline void ArrayWriter<Dest>::Reset(char* dest, size_t size) {
  Reset(absl::MakeSpan(dest, size));
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_ARRAY_WRITER_H_


================================================
FILE: riegeli/bytes/backward_writer.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/backward_writer.h"

#include <stddef.h>

#include <cmath>
#include <cstring>
#include <limits>
#include <optional>
#include <utility>
#include <vector>

#include "absl/base/optimization.h"
#include "absl/numeric/int128.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/cord_utils.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/write_int_internal.h"

namespace riegeli {

namespace {

template <typename T>
inline bool WriteUnsigned(T src, BackwardWriter& dest) {
  // `digits10` is rounded down, `kMaxNumDigits` is rounded up, hence `+ 1`.
  constexpr size_t kMaxNumDigits = std::numeric_limits<T>::digits10 + 1;
  if (ABSL_PREDICT_FALSE(!dest.Push(kMaxNumDigits))) return false;
  dest.set_cursor(
      write_int_internal::WriteDecUnsignedBackward(src, dest.cursor()));
  return true;
}

template <typename T>
inline bool WriteSigned(T src, BackwardWriter& dest) {
  // `digits10` is rounded down, `kMaxNumDigits` is rounded up, hence `+ 1`.
  constexpr size_t kMaxNumDigits = std::numeric_limits<T>::digits10 + 1;
  // `+ 1` for the minus sign.
  if (ABSL_PREDICT_FALSE(!dest.Push(kMaxNumDigits + 1))) return false;
  dest.set_cursor(
      write_int_internal::WriteDecSignedBackward(src, dest.cursor()));
  return true;
}

}  // namespace

void BackwardWriter::OnFail() { set_buffer(start()); }

absl::Status BackwardWriter::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) return Annotate(status, absl::StrCat("at byte ", pos()));
  return status;
}

bool BackwardWriter::FailOverflow() {
  return Fail(absl::ResourceExhaustedError("BackwardWriter position overflow"));
}

bool BackwardWriter::Write(const Chain& src) {
#ifdef MEMORY_SANITIZER
  for (const absl::string_view fragment : src.blocks()) {
    AssertInitialized(fragment.data(), fragment.size());
  }
#endif
  if (ABSL_PREDICT_TRUE(available() >= src.size() &&
                        src.size() <= kMaxBytesToCopy)) {
    move_cursor(src.size());
    src.CopyTo(cursor());
    return true;
  }
  AssertInitialized(cursor(), start_to_cursor());
  return WriteSlow(src);
}

bool BackwardWriter::Write(Chain&& src) {
#ifdef MEMORY_SANITIZER
  for (const absl::string_view fragment : src.blocks()) {
    AssertInitialized(fragment.data(), fragment.size());
  }
#endif
  if (ABSL_PREDICT_TRUE(available() >= src.size() &&
                        src.size() <= kMaxBytesToCopy)) {
    move_cursor(src.size());
    src.CopyTo(cursor());
    return true;
  }
  AssertInitialized(cursor(), start_to_cursor());
  return WriteSlow(std::move(src));
}

bool BackwardWriter::Write(const absl::Cord& src) {
#ifdef MEMORY_SANITIZER
  for (const absl::string_view fragment : src.Chunks()) {
    AssertInitialized(fragment.data(), fragment.size());
  }
#endif
  if (ABSL_PREDICT_TRUE(available() >= src.size() &&
                        src.size() <= kMaxBytesToCopy)) {
    move_cursor(src.size());
    cord_internal::CopyCordToArray(src, cursor());
    return true;
  }
  AssertInitialized(cursor(), start_to_cursor());
  return WriteSlow(src);
}

bool BackwardWriter::Write(absl::Cord&& src) {
#ifdef MEMORY_SANITIZER
  for (const absl::string_view fragment : src.Chunks()) {
    AssertInitialized(fragment.data(), fragment.size());
  }
#endif
  if (ABSL_PREDICT_TRUE(available() >= src.size() &&
                        src.size() <= kMaxBytesToCopy)) {
    move_cursor(src.size());
    cord_internal::CopyCordToArray(src, cursor());
    return true;
  }
  AssertInitialized(cursor(), start_to_cursor());
  return WriteSlow(std::move(src));
}

bool BackwardWriter::Write(signed char src) { return WriteSigned(src, *this); }

bool BackwardWriter::Write(unsigned char src) {
  return WriteUnsigned(src, *this);
}

bool BackwardWriter::Write(short src) { return WriteSigned(src, *this); }

bool BackwardWriter::Write(unsigned short src) {
  return WriteUnsigned(src, *this);
}

bool BackwardWriter::Write(int src) { return WriteSigned(src, *this); }

bool BackwardWriter::Write(unsigned src) { return WriteUnsigned(src, *this); }

bool BackwardWriter::Write(long src) { return WriteSigned(src, *this); }

bool BackwardWriter::Write(unsigned long src) {
  return WriteUnsigned(src, *this);
}

bool BackwardWriter::Write(long long src) { return WriteSigned(src, *this); }

bool BackwardWriter::Write(unsigned long long src) {
  return WriteUnsigned(src, *this);
}

bool BackwardWriter::Write(absl::int128 src) { return WriteSigned(src, *this); }

bool BackwardWriter::Write(absl::uint128 src) {
  return WriteUnsigned(src, *this);
}

// TODO: Optimize implementations below.
bool BackwardWriter::Write(float src) { return Write(absl::StrCat(src)); }

bool BackwardWriter::Write(double src) { return Write(absl::StrCat(src)); }

bool BackwardWriter::Write(long double src) {
  return Write(
      absl::StrFormat("%g",
                      // Consistently use "nan", never "-nan".
                      ABSL_PREDICT_FALSE(std::isnan(src))
                          ? std::numeric_limits<long double>::quiet_NaN()
                          : src));
}

bool BackwardWriter::WriteSlow(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(string_view): "
         "enough space available, use Write(string_view) instead";
  do {
    const size_t available_length = available();
    move_cursor(available_length);
    riegeli::null_safe_memcpy(
        cursor(), src.data() + src.size() - available_length, available_length);
    src.remove_suffix(available_length);
    if (ABSL_PREDICT_FALSE(!PushSlow(1, src.size()))) return false;
  } while (src.size() > available());
  move_cursor(src.size());
  std::memcpy(cursor(), src.data(), src.size());
  return true;
}

bool BackwardWriter::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  return Write(absl::string_view(src));
}

bool BackwardWriter::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  for (Chain::Blocks::const_reverse_iterator iter = src.blocks().crbegin();
       iter != src.blocks().crend(); ++iter) {
    if (ABSL_PREDICT_FALSE(!Write(absl::string_view(*iter)))) return false;
  }
  return true;
}

bool BackwardWriter::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  // Not `std::move(src)`: forward to `WriteSlow(const Chain&)`.
  return WriteSlow(src);
}

bool BackwardWriter::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  if (const std::optional<absl::string_view> flat = src.TryFlat();
      flat != std::nullopt) {
    return Write(*flat);
  }
  if (src.size() <= available()) {
    move_cursor(src.size());
    cord_internal::CopyCordToArray(src, cursor());
    return true;
  }
  std::vector<absl::string_view> fragments(src.chunk_begin(), src.chunk_end());
  for (std::vector<absl::string_view>::const_reverse_iterator iter =
           fragments.crbegin();
       iter != fragments.crend(); ++iter) {
    if (ABSL_PREDICT_FALSE(!Write(*iter))) return false;
  }
  return true;
}

bool BackwardWriter::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  // Not `std::move(src)`: forward to `WriteSlow(const absl::Cord&)`.
  return WriteSlow(src);
}

bool BackwardWriter::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  while (src.size() > available()) {
    const size_t available_length = available();
    move_cursor(available_length);
    riegeli::null_safe_memset(cursor(), src.fill(), available_length);
    src.Extract(available_length);
    if (ABSL_PREDICT_FALSE(!Push(1, SaturatingIntCast<size_t>(src.size())))) {
      return false;
    }
  }
  move_cursor(IntCast<size_t>(src.size()));
  std::memset(cursor(), src.fill(), IntCast<size_t>(src.size()));
  return true;
}

bool BackwardWriter::FlushImpl(FlushType flush_type) { return ok(); }

bool BackwardWriter::TruncateImpl(Position new_size) {
  return Fail(
      absl::UnimplementedError("BackwardWriter::Truncate() not supported"));
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/backward_writer.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_BACKWARD_WRITER_H_
#define RIEGELI_BYTES_BACKWARD_WRITER_H_

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <optional>
#include <tuple>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/numeric/int128.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/has_absl_stringify.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/restricted_chain_writer.h"
#include "riegeli/bytes/stringify.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

// Abstract class `BackwardWriter` writes sequences of bytes to a destination,
// like `Writer`, but back to front.
//
// Sequential writing is supported, random access is not supported, truncation
// is optionally supported.
class BackwardWriter : public Object {
 public:
  // The same as `Object::Close()`.
  //
  // The implementation in this class adds an assertion.
  bool Close();

  // If `write_size_hint` is not `std::nullopt`, hints that this amount of data
  // will be written sequentially from the current position, then `Close()` will
  // be called.
  //
  // This may improve performance and memory usage:
  //  * Larger buffer sizes may be used before reaching the size hint, and
  //    a smaller buffer size may be used when reaching the size hint.
  //  * This hint may be propagated to owned destinations.
  //  * Other consequences are possible.
  //
  // If the hint turns out to not match reality, nothing breaks. It is better if
  // `write_size_hint` is slightly too large than slightly too small.
  //
  // `SetWriteSizeHint()` is usually be called from the same abstraction layer
  // which later calls `Close()`.
  void SetWriteSizeHint(std::optional<Position> write_size_hint);

  // Ensures that enough space is available in the buffer: if less than
  // `min_length` of space is available, pushes previously written data to the
  // destination, and points `cursor()` and `limit()` to space following the
  // current position with length at least `min_length`, preferably
  // `recommended_length`.
  //
  // The current position does not change with `Push()`. It changes with e.g.
  // `move_cursor()` and `Write()`.
  //
  // If `recommended_length < min_length`, `recommended_length` is assumed to be
  // `min_length`.
  //
  // Return values:
  //  * `true`  - success (`available() >= min_length`)
  //  * `false` - failure (`available() < min_length`, `!ok()`)
  bool Push(size_t min_length = 1, size_t recommended_length = 0);

  // Buffer pointers. Space between `start()` (exclusive upper bound) and
  // `limit()` (inclusive lower bound) is available for immediate writing data
  // to it, with `cursor()` pointing to the current position going downwards
  // (past the next byte to write).
  //
  // Memory before the address to which `cursor()` is eventually moved must not
  // be clobbered.
  //
  // Non-const member functions may change buffer pointers, including changing
  // how much data around the current position are buffered.
  //
  // Invariants:
  //   `start() >= cursor() >= limit()` (possibly all `nullptr`)
  //   if `!ok()` then `start() == cursor() == limit() == nullptr`
  char* start() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return start_; }
  char* cursor() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return cursor_; }
  char* limit() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return limit_; }

  // Decrements the value of `cursor()`. Does not change `start()` nor
  // `limit()`. Call this during writing data under `cursor()` to indicate how
  // much was written.
  //
  // Precondition: `length <= available()`
  void move_cursor(size_t length);

  // Sets the value of `cursor()`. Does not change `start()` nor `limit()`. Call
  // this during writing data under `cursor()` to indicate how much was written.
  //
  // Precondition: `start() >= cursor >= limit()`
  void set_cursor(char* cursor);

  // Returns the amount of space available in the buffer, between `cursor()` and
  // `limit()`.
  //
  // Invariant: if `!ok()` then `available() == 0`
  size_t available() const { return PtrDistance(limit_, cursor_); }

  // Returns the buffer size, between `start()` and `limit()`.
  //
  // Invariant: if `!ok()` then `start_to_limit() == 0`
  size_t start_to_limit() const { return PtrDistance(limit_, start_); }

  // Returns the amount of data written to the buffer, between `start()` and
  // `cursor()`.
  //
  // Invariant: if `!ok()` then `start_to_cursor() == 0`
  size_t start_to_cursor() const { return PtrDistance(cursor_, start_); }

  // Writes a single byte to the buffer or the destination.
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  bool WriteByte(uint8_t src) { return Write(static_cast<char>(src)); }

  // Writes a fixed number of bytes from `src` to the buffer and/or the
  // destination. The whole `src` is prepended, bytes are not reversed.
  //
  // Return values:
  //  * `true`  - success (`src.size()` bytes written)
  //  * `false` - failure (a suffix of less than `src.size()` bytes written,
  //                       `!ok()`)
  bool Write(char src);
#if __cpp_char8_t
  bool Write(char8_t src) { return Write(static_cast<char>(src)); }
#endif
  bool Write(BytesRef src);
  ABSL_ATTRIBUTE_ALWAYS_INLINE
  bool Write(const char* src) { return Write(absl::string_view(src)); }
  bool Write(ExternalRef src);
  template <typename Src,
            std::enable_if_t<SupportsExternalRefWhole<Src>::value, int> = 0>
  bool Write(Src&& src);
  bool Write(const Chain& src);
  bool Write(Chain&& src);
  bool Write(const absl::Cord& src);
  bool Write(absl::Cord&& src);
  bool Write(ByteFill src);

  // Writes a stringified value to the buffer and/or the destination.
  //
  // Return values:
  //  * `true`  - success
  //  * `false` - failure (`!ok()`)
  bool Write(signed char src);
  bool Write(unsigned char src);
  bool Write(short src);
  bool Write(unsigned short src);
  bool Write(int src);
  bool Write(unsigned src);
  bool Write(long src);
  bool Write(unsigned long src);
  bool Write(long long src);
  bool Write(unsigned long long src);
  bool Write(absl::int128 src);
  bool Write(absl::uint128 src);
  bool Write(float src);
  bool Write(double src);
  bool Write(long double src);
  template <
      typename Src,
      std::enable_if_t<
          std::conjunction_v<
              absl::HasAbslStringify<Src>,
              std::negation<std::is_convertible<Src&&, BytesRef>>,
              std::negation<std::is_convertible<Src&&, const Chain&>>,
              std::negation<std::is_convertible<Src&&, const absl::Cord&>>,
              std::negation<std::is_convertible<Src&&, ByteFill>>>,
          int> = 0>
  bool Write(Src&& src);

  // Other integer types are is not supported. Delete overloads to avoid
  // implicit conversions.
  bool Write(bool src) = delete;
  bool Write(wchar_t src) = delete;
  bool Write(char16_t src) = delete;
  bool Write(char32_t src) = delete;

  // Writes stringified values to the buffer and/or the destination.
  //
  // `srcs` are prepended in the reverse order, so that they appear in the
  // destination in the same order as arguments of `Write()`.
  //
  // Return values:
  //  * `true`  - success
  //  * `false` - failure (`!ok()`)
  template <typename... Srcs
#if !__cpp_concepts
            ,
            std::enable_if_t<
                std::conjunction_v<std::bool_constant<sizeof...(Srcs) != 1>,
                                   IsStringifiable<Srcs...>>,
                int> = 0
#endif
            >
  bool Write(Srcs&&... srcs)
#if __cpp_concepts
      // For conjunctions, `requires` gives better error messages than
      // `std::enable_if_t`, indicating the relevant argument.
    requires(sizeof...(Srcs) != 1) && (IsStringifiable<Srcs>::value && ...)
#endif
  {
    return WriteInternal<sizeof...(Srcs)>(
        std::forward_as_tuple(std::forward<Srcs>(srcs)...));
  }

  // Pushes buffered data to the destination.
  //
  // This makes data written so far visible, but in contrast to `Close()`,
  // keeps the possibility to write more data later. What exactly does it mean
  // for data to be visible depends on the destination. If this is not
  // applicable or not feasible, does nothing.
  //
  // The scope of objects to flush and the intended data durability (without a
  // guarantee) are specified by `flush_type`:
  //  * `FlushType::kFromObject`  - Makes data written so far visible in other
  //                                objects, propagating flushing through owned
  //                                dependencies of the given writer.
  //  * `FlushType::kFromProcess` - Makes data written so far visible outside
  //                                the process, propagating flushing through
  //                                dependencies of the given writer.
  //                                This is the default.
  //  * `FlushType::kFromMachine` - Makes data written so far visible outside
  //                                the process and durable in case of operating
  //                                system crash, propagating flushing through
  //                                dependencies of the given writer.
  //
  // Return values:
  //  * `true ` - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  bool Flush(FlushType flush_type = FlushType::kFromProcess);

  // Returns the current position (increasing as data are prepended).
  //
  // This is not necessarily 0 after creating the `BackwardWriter` if it
  // prepends to a destination with existing contents, or if the
  // `BackwardWriter` wraps another writer or output stream propagating its
  // position.
  //
  // This may decrease when the `BackwardWriter` becomes not OK (due to
  // buffering, previously written but unflushed data may be lost).
  //
  // `pos()` is unchanged by a successful `Close()`.
  Position pos() const;

  // Returns the position corresponding to `start()`,
  // i.e. `pos() - start_to_cursor()`.
  Position start_pos() const { return start_pos_; }

  // Returns the position corresponding to `limit()`,
  // i.e. `pos() + available()`.
  Position limit_pos() const;

  // Returns `true` if this `BackwardWriter` supports `Truncate()`.
  virtual bool SupportsTruncate() { return false; }

  // Discards the part of the destination after the given position. Sets the
  // current position to the new end.
  //
  // Return values:
  //  * `true`                 - success (destination truncated, `ok()`)
  //  * `false` (when `ok()`)  - destination is smaller than `new_size`
  //                             (position is set to end)
  //  * `false` (when `!ok()`) - failure
  //
  // `Truncate()` is supported if `SupportsTruncate()` is `true`.
  bool Truncate(Position new_size);

 protected:
  using Object::Object;

  // Moves the part of the object defined in this class.
  //
  // Buffer pointers do not need to satisfy their invariants during this part of
  // the move, here they are merely exchanged with `nullptr` and copied.
  BackwardWriter(BackwardWriter&& that) noexcept;
  BackwardWriter& operator=(BackwardWriter&& that) noexcept;

  // Makes `*this` equivalent to a newly constructed `BackwardWriter`. This
  // avoids constructing a temporary `BackwardWriter` and moving from it.
  // Derived classes which redefine `Reset()` should include a call to
  // `BackwardWriter::Reset()`.
  void Reset(Closed);
  void Reset();

  // `BackwardWriter` overrides `Object::Done()` to set buffer pointers to
  // `nullptr`. Derived classes which override it further should include a call
  // to `BackwardWriter::Done()`.
  void Done() override;

  // `BackwardWriter` overrides `Object::OnFail()` to set buffer pointers to
  // `nullptr`. Derived classes which override it further should include a call
  // to `BackwardWriter::OnFail()`.
  //
  // `pos()` decreases by `start_to_cursor()` to indicate that any buffered
  // data have been lost.
  ABSL_ATTRIBUTE_COLD void OnFail() override;

  // `BackwardWriter` overrides `Object::AnnotateStatusImpl()` to annotate the
  // status with the current position.
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;

  // Marks the `BackwardWriter` as failed with message
  // "BackwardWriter position overflow". Always returns `false`.
  //
  // This can be called if the destination would exceed its maximum possible
  // size or if `start_pos()` would overflow.
  ABSL_ATTRIBUTE_COLD bool FailOverflow();

  // Implementation of `SetWriteSizeHint()`.
  virtual void SetWriteSizeHintImpl(
      ABSL_ATTRIBUTE_UNUSED std::optional<Position> write_size_hint) {}

  // Implementation of the slow part of `Push()`.
  //
  // Precondition: `available() < min_length`
  virtual bool PushSlow(size_t min_length, size_t recommended_length) = 0;

  // Sets the values of:
  //  * `start()`  - to `limit + start_to_limit`
  //  * `cursor()` - to `start() - start_to_cursor`
  //  * `limit()`  - to `limit`
  //
  // Preconditions:
  //   [`limit`..`limit + start_to_limit`) is a valid byte range
  //   `start_to_cursor <= start_to_limit`
  void set_buffer(char* limit = nullptr, size_t start_to_limit = 0,
                  size_t start_to_cursor = 0);

  // Implementation of the slow part of `Write()`.
  //
  // By default:
  //  * `WriteSlow(absl::string_view)` and `WriteSlow(ByteFill)` are
  //    implemented in terms of `PushSlow()`
  //  * `WriteSlow(ExternalRef)`, `WriteSlow(const Chain&)`, and
  //    `WriteSlow(const absl::Cord&)` are implemented in terms of
  //    `WriteSlow(absl::string_view)`
  //  * `WriteSlow(Chain&&)` is implemented in terms of
  //    `WriteSlow(const Chain&)`
  //  * `WriteSlow(absl::Cord&&)` is implemented in terms of
  //    `WriteSlow(const absl::Cord&)`
  //
  // Precondition for `WriteSlow(absl::string_view)`:
  //   `available() < src.size()`
  //
  // Precondition for `WriteSlow(ExternalRef)`, `WriteSlow(const Chain&)`,
  // `WriteSlow(Chain&&)`, `WriteSlow(const absl::Cord&)`,
  // `WriteSlow(absl::Cord&&), and `WriteSlow(ByteFill)`:
  //   `UnsignedMin(available(), kMaxBytesToCopy) < src.size()`
  virtual bool WriteSlow(absl::string_view src);
  virtual bool WriteSlow(ExternalRef src);
  virtual bool WriteSlow(const Chain& src);
  virtual bool WriteSlow(Chain&& src);
  virtual bool WriteSlow(const absl::Cord& src);
  virtual bool WriteSlow(absl::Cord&& src);
  virtual bool WriteSlow(ByteFill src);

  // Implementation of `Flush()`, except that the parameter is not defaulted,
  // which is problematic for virtual functions.
  //
  // By default does nothing and returns `ok()`.
  virtual bool FlushImpl(FlushType flush_type);

  // Increments the value of `start_pos()`.
  void move_start_pos(Position length);

  // Sets the value of `start_pos()`.
  void set_start_pos(Position start_pos);

  // Implementation of `Truncate()`.
  //
  // By default fails.
  virtual bool TruncateImpl(Position new_size);

 private:
  template <size_t index, typename... Srcs>
  ABSL_ATTRIBUTE_ALWAYS_INLINE bool WriteInternal(std::tuple<Srcs...>&& srcs) {
    if constexpr (index > 0) {
      return Write(std::forward<
                   std::tuple_element_t<index - 1, std::tuple<Srcs...>>>(
                 std::get<index - 1>(srcs))) &&
             WriteInternal<index - 1>(std::move(srcs));
    } else {
      return true;
    }
  }

  char* start_ = nullptr;
  char* cursor_ = nullptr;
  char* limit_ = nullptr;

  // Destination position corresponding to `start_`.
  //
  // Invariant:
  //   `start_pos_ <= std::numeric_limits<Position>::max() - start_to_limit()`
  Position start_pos_ = 0;
};

// Implementation details follow.

inline BackwardWriter::BackwardWriter(BackwardWriter&& that) noexcept
    : Object(static_cast<Object&&>(that)),
      start_(std::exchange(that.start_, nullptr)),
      cursor_(std::exchange(that.cursor_, nullptr)),
      limit_(std::exchange(that.limit_, nullptr)),
      start_pos_(std::exchange(that.start_pos_, 0)) {}

inline BackwardWriter& BackwardWriter::operator=(
    BackwardWriter&& that) noexcept {
  Object::operator=(static_cast<Object&&>(that));
  start_ = std::exchange(that.start_, nullptr);
  cursor_ = std::exchange(that.cursor_, nullptr);
  limit_ = std::exchange(that.limit_, nullptr);
  start_pos_ = std::exchange(that.start_pos_, 0);
  return *this;
}

inline void BackwardWriter::Reset(Closed) {
  Object::Reset(kClosed);
  start_ = nullptr;
  cursor_ = nullptr;
  limit_ = nullptr;
  start_pos_ = 0;
}

inline void BackwardWriter::Reset() {
  Object::Reset();
  start_ = nullptr;
  cursor_ = nullptr;
  limit_ = nullptr;
  start_pos_ = 0;
}

inline bool BackwardWriter::Close() {
  AssertInitialized(cursor(), start_to_cursor());
  return Object::Close();
}

inline void BackwardWriter::Done() {
  start_pos_ = pos();
  set_buffer();
}

inline void BackwardWriter::SetWriteSizeHint(
    std::optional<Position> write_size_hint) {
  AssertInitialized(cursor(), start_to_cursor());
  SetWriteSizeHintImpl(write_size_hint);
}

inline bool BackwardWriter::Push(size_t min_length, size_t recommended_length) {
  if (ABSL_PREDICT_TRUE(available() >= min_length)) return true;
  AssertInitialized(cursor(), start_to_cursor());
  if (ABSL_PREDICT_FALSE(!PushSlow(min_length, recommended_length))) {
    return false;
  }
  RIEGELI_ASSERT_GE(available(), min_length)
      << "Failed postcondition of BackwardWriter::PushSlow(): "
         "not enough space available";
  return true;
}

inline void BackwardWriter::move_cursor(size_t length) {
  RIEGELI_ASSERT_LE(length, available())
      << "Failed precondition of BackwardWriter::move_cursor(): "
         "length out of range";
  cursor_ -= length;
}

inline void BackwardWriter::set_cursor(char* cursor) {
  RIEGELI_ASSERT_LE(cursor, start())
      << "Failed precondition of BackwardWriter::set_cursor(): "
         "pointer out of range";
  RIEGELI_ASSERT_GE(cursor, limit())
      << "Failed precondition of BackwardWriter::set_cursor(): "
         "pointer out of range";
  cursor_ = cursor;
}

inline void BackwardWriter::set_buffer(char* limit, size_t start_to_limit,
                                       size_t start_to_cursor) {
  RIEGELI_ASSERT_LE(start_to_cursor, start_to_limit)
      << "Failed precondition of BackwardWriter::set_buffer(): "
         "length out of range";
  start_ = limit + start_to_limit;
  cursor_ = start_ - start_to_cursor;
  limit_ = limit;
}

inline bool BackwardWriter::Write(char src) {
  if (ABSL_PREDICT_FALSE(!Push())) return false;
  move_cursor(1);
  *cursor() = src;
  return true;
}

inline bool BackwardWriter::Write(BytesRef src) {
  AssertInitialized(src.data(), src.size());
  if (ABSL_PREDICT_TRUE(available() >= src.size())) {
    move_cursor(src.size());
    riegeli::null_safe_memcpy(cursor(), src.data(), src.size());
    return true;
  }
  AssertInitialized(cursor(), start_to_cursor());
  return WriteSlow(src);
}

inline bool BackwardWriter::Write(ExternalRef src) {
  if (ABSL_PREDICT_TRUE(available() >= src.size() &&
                        src.size() <= kMaxBytesToCopy)) {
    move_cursor(src.size());
    riegeli::null_safe_memcpy(cursor(), src.data(), src.size());
    return true;
  }
  AssertInitialized(cursor(), start_to_cursor());
  return WriteSlow(std::move(src));
}

template <typename Src,
          std::enable_if_t<SupportsExternalRefWhole<Src>::value, int>>
inline bool BackwardWriter::Write(Src&& src) {
  return Write(ExternalRef(std::forward<Src>(src)));
}

inline bool BackwardWriter::Write(ByteFill src) {
  if (ABSL_PREDICT_TRUE(available() >= src.size() &&
                        src.size() <= kMaxBytesToCopy)) {
    move_cursor(IntCast<size_t>(src.size()));
    riegeli::null_safe_memset(cursor(), src.fill(),
                              IntCast<size_t>(src.size()));
    return true;
  }
  AssertInitialized(cursor(), start_to_cursor());
  return WriteSlow(src);
}

template <typename Src,
          std::enable_if_t<
              std::conjunction_v<
                  absl::HasAbslStringify<Src>,
                  std::negation<std::is_convertible<Src&&, BytesRef>>,
                  std::negation<std::is_convertible<Src&&, const Chain&>>,
                  std::negation<std::is_convertible<Src&&, const absl::Cord&>>,
                  std::negation<std::is_convertible<Src&&, ByteFill>>>,
              int>>
bool BackwardWriter::Write(Src&& src) {
  RestrictedChainWriter chain_writer;
  WriterStringifySink sink(&chain_writer);
  AbslStringify(sink, std::forward<Src>(src));
  if (ABSL_PREDICT_FALSE(!chain_writer.Close())) {
    return Fail(chain_writer.status());
  }
  return Write(std::move(chain_writer.dest()));
}

inline bool BackwardWriter::Flush(FlushType flush_type) {
  AssertInitialized(cursor(), start_to_cursor());
  return FlushImpl(flush_type);
}

inline Position BackwardWriter::pos() const {
  RIEGELI_ASSERT_LE(start_pos_,
                    std::numeric_limits<Position>::max() - start_to_limit())
      << "Failed invariant of BackwardWriter: "
         "position of buffer limit overflow";
  return start_pos_ + start_to_cursor();
}

inline Position BackwardWriter::limit_pos() const {
  RIEGELI_ASSERT_LE(start_pos_,
                    std::numeric_limits<Position>::max() - start_to_limit())
      << "Failed invariant of BackwardWriter: "
         "position of buffer limit overflow";
  return start_pos_ + start_to_limit();
}

inline void BackwardWriter::move_start_pos(Position length) {
  RIEGELI_ASSERT_LE(length, std::numeric_limits<Position>::max() - start_pos_)
      << "Failed precondition of BackwardWriter::move_start_pos(): "
         "position out of range";
  start_pos_ += length;
}

inline void BackwardWriter::set_start_pos(Position start_pos) {
  start_pos_ = start_pos;
}

inline bool BackwardWriter::Truncate(Position new_size) {
  AssertInitialized(cursor(), start_to_cursor());
  return TruncateImpl(new_size);
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_BACKWARD_WRITER_H_


================================================
FILE: riegeli/bytes/buffer_options.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/buffer_options.h"

#include <stddef.h>

#include <optional>

#include "absl/numeric/bits.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/types.h"

namespace riegeli {

namespace {

// Recommends the length of a buffer.
//
// The following constraints are applied, in the order of weakest to strongest:
//  * If `single_run` and `pos` did not pass `size_hint` yet, the remaining
//    length, otherwise the base recommendation of `length`.
//  * At least `max(min_length, recommended_length)`.
//  * At most `max_length`.
//  * Aligned so that the next position is a multiple of the length so far
//    rounded up to the nearest power of 2, but at least `min_length`.
inline size_t ApplySizeHintAndRoundPos(Position base_length, size_t min_length,
                                       size_t recommended_length,
                                       size_t max_length,
                                       std::optional<Position> size_hint,
                                       Position pos, bool single_run) {
  if (single_run) base_length = ApplySizeHint(base_length, size_hint, pos);
  const size_t length_for_rounding = UnsignedMin(
      UnsignedMax(base_length, min_length, recommended_length), max_length);
  if (length_for_rounding == 0) return min_length;
  const size_t rounding_mask = absl::bit_ceil(length_for_rounding) - 1;
  const size_t rounded_length = (~pos & rounding_mask) + 1;
  if (rounded_length < min_length) {
    // Return at least `min_length`, keeping the same remainder modulo
    // `rounding_mask + 1` as `rounded_length`.
    return ((min_length - rounded_length + rounding_mask) & ~rounding_mask) +
           rounded_length;
  }
  return rounded_length;
}

}  // namespace

size_t ReadBufferSizer::BufferLength(Position pos, size_t min_length,
                                     size_t recommended_length) const {
  RIEGELI_ASSERT_GE(pos, base_pos_)
      << "Failed precondition of ReadBufferSizer::ReadBufferLength(): "
      << "position earlier than base position of the run";
  const size_t length = ApplySizeHintAndRoundPos(
      UnsignedMax(pos - base_pos_, buffer_length_from_last_run_,
                  buffer_options_.min_buffer_size()),
      min_length, recommended_length, buffer_options_.max_buffer_size(),
      exact_size(), pos, read_all_hint_);
  if (exact_size() != std::nullopt) {
    return UnsignedMin(length, SaturatingSub(*exact_size(), pos));
  }
  return length;
}

size_t WriteBufferSizer::BufferLength(Position pos, size_t min_length,
                                      size_t recommended_length) const {
  RIEGELI_ASSERT_GE(pos, base_pos_)
      << "Failed precondition of WriteBufferSizer::WriteBufferLength(): "
      << "position earlier than base position of the run";
  const size_t length = ApplySizeHintAndRoundPos(
      UnsignedMax(pos - base_pos_, buffer_length_from_last_run_,
                  buffer_options_.min_buffer_size()),
      min_length, recommended_length, buffer_options_.max_buffer_size(),
      size_hint(), pos, buffer_length_from_last_run_ == 0);
  if (size_hint() != std::nullopt && pos <= *size_hint()) {
    return UnsignedClamp(length, min_length, *size_hint() - pos);
  }
  return length;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/buffer_options.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_BUFFER_OPTIONS_H_
#define RIEGELI_BYTES_BUFFER_OPTIONS_H_

#include <stddef.h>
#include <stdint.h>

#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/types.h"

namespace riegeli {

// Common options related to buffering in a `Reader` or `Writer`.
class BufferOptions {
 public:
  BufferOptions() noexcept {}

  // Tunes the minimal buffer size, which determines how much data at a time is
  // typically read from the source / written to the destination.
  //
  // The actual buffer size changes between `min_buffer_size()` and
  // `max_buffer_size()` depending on the access pattern.
  //
  // Default: `kDefaultMinBufferSize` (4K).
  static constexpr size_t kDefaultMinBufferSize = size_t{4} << 10;
  BufferOptions& set_min_buffer_size(size_t min_buffer_size) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    min_buffer_size_ = UnsignedMin(min_buffer_size, uint32_t{1} << 31);
    return *this;
  }
  BufferOptions&& set_min_buffer_size(size_t min_buffer_size) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_min_buffer_size(min_buffer_size));
  }
  size_t min_buffer_size() const { return min_buffer_size_; }

  // Tunes the maximal buffer size, which determines how much data at a time is
  // typically read from the source / written to the destination.
  //
  // The actual buffer size changes between `min_buffer_size()` and
  // `max_buffer_size()` depending on the access pattern.
  //
  // Default: `kDefaultMaxBufferSize` (64K).
  static constexpr size_t kDefaultMaxBufferSize = size_t{64} << 10;
  BufferOptions& set_max_buffer_size(size_t max_buffer_size) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT_GT(max_buffer_size, 0u)
        << "Failed precondition of BufferOptions::set_max_buffer_size(): "
           "zero buffer size";
    max_buffer_size_ = UnsignedMin(max_buffer_size, uint32_t{1} << 31);
    return *this;
  }
  BufferOptions&& set_max_buffer_size(size_t max_buffer_size) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_max_buffer_size(max_buffer_size));
  }
  size_t max_buffer_size() const { return max_buffer_size_; }

  // A shortcut for `set_min_buffer_size(buffer_size)` with
  // `set_max_buffer_size(buffer_size)`.
  BufferOptions& set_buffer_size(size_t buffer_size) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return set_min_buffer_size(buffer_size).set_max_buffer_size(buffer_size);
  }
  BufferOptions&& set_buffer_size(size_t buffer_size) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_buffer_size(buffer_size));
  }

 private:
  // Use `uint32_t` instead of `size_t` to reduce the object size.
  uint32_t min_buffer_size_ = uint32_t{kDefaultMinBufferSize};
  uint32_t max_buffer_size_ = uint32_t{kDefaultMaxBufferSize};
};

// Deriving `Options` from `BufferOptionsBase<Options>` makes it easier to
// provide options related to buffering among `Options` (deriving from
// `BufferOptions` would yield wrong result types of setters).
//
// Default values of `{min,max}_buffer_size()` can be overridden by
// `Options::kDefault{Min,Max}BufferSize`.
template <typename Options>
class BufferOptionsBase {
 public:
  BufferOptionsBase() noexcept {
    static_assert(std::is_base_of_v<BufferOptionsBase<Options>, Options>,
                  "The template argument Options in BufferOptionsBase<Options> "
                  "must be the class derived from BufferOptionsBase<Options>");
    set_min_buffer_size(Options::kDefaultMinBufferSize);
    set_max_buffer_size(Options::kDefaultMaxBufferSize);
  }

  // See `BufferOptions::set_min_buffer_size()`.
  static constexpr size_t kDefaultMinBufferSize =
      BufferOptions::kDefaultMinBufferSize;
  Options& set_min_buffer_size(size_t min_buffer_size) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    buffer_options_.set_min_buffer_size(min_buffer_size);
    return static_cast<Options&>(*this);
  }
  Options&& set_min_buffer_size(size_t min_buffer_size) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_min_buffer_size(min_buffer_size));
  }
  size_t min_buffer_size() const { return buffer_options_.min_buffer_size(); }

  // See `BufferOptions::set_max_buffer_size()`.
  static constexpr size_t kDefaultMaxBufferSize =
      BufferOptions::kDefaultMaxBufferSize;
  Options& set_max_buffer_size(size_t max_buffer_size) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    buffer_options_.set_max_buffer_size(max_buffer_size);
    return static_cast<Options&>(*this);
  }
  Options&& set_max_buffer_size(size_t max_buffer_size) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_max_buffer_size(max_buffer_size));
  }
  size_t max_buffer_size() const { return buffer_options_.max_buffer_size(); }

  // See `BufferOptions::set_buffer_size()`.
  Options& set_buffer_size(size_t buffer_size) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
    buffer_options_.set_buffer_size(buffer_size);
    return static_cast<Options&>(*this);
  }
  Options&& set_buffer_size(size_t buffer_size) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_buffer_size(buffer_size));
  }

  // Grouped options related to buffering.
  Options& set_buffer_options(BufferOptions buffer_options) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    buffer_options_ = buffer_options;
    return static_cast<Options&>(*this);
  }
  Options&& set_buffer_options(BufferOptions buffer_options) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_buffer_options(buffer_options));
  }
  BufferOptions& buffer_options() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return buffer_options_;
  }
  const BufferOptions& buffer_options() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return buffer_options_;
  }

 protected:
  BufferOptionsBase(const BufferOptionsBase& that) = default;
  BufferOptionsBase& operator=(const BufferOptionsBase& that) = default;

  ~BufferOptionsBase() = default;

 private:
  BufferOptions buffer_options_;
};

// Recommends an adaptive buffer length based on the access pattern of a
// `Reader`.
//
// The buffer length grows geometrically from `min_buffer_size` to
// `max_buffer_size` through each run of sequential reading operations.
//
// A new run may begin from a function which forces using a new buffer
// (`Reader::Seek()` or `Reader::Sync()`). The buffer length for the new run is
// optimized for the case when the new run will have a similar length to the
// previous non-empty run.
//
// The lengths aim at letting absolute positions be multiples of sufficiently
// large powers of 2. Aligned positions might make reading from the source more
// efficient. This is also important for filling the remaining part of the
// buffer if the source returned less data than asked for.
class ReadBufferSizer {
 public:
  ReadBufferSizer() = default;

  explicit ReadBufferSizer(BufferOptions buffer_options);

  ReadBufferSizer(const ReadBufferSizer& that) = default;
  ReadBufferSizer& operator=(const ReadBufferSizer& that) = default;

  ABSL_ATTRIBUTE_REINITIALIZES void Reset();
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(BufferOptions buffer_options);

  // Returns the options passed to the constructor.
  BufferOptions buffer_options() const { return buffer_options_; }

  // Intended storage for the hint set by `Reader::SetReadAllHint()`.
  //
  // If `true` and `exact_size()` is not `absl::nullptr`, this causes larger
  // buffer sizes to be used before reaching `*exact_size()`.
  void set_read_all_hint(bool read_all_hint) { read_all_hint_ = read_all_hint; }
  bool read_all_hint() const { return read_all_hint_; }

  // Intended storage for an exact size of the source, as discovered by the
  // `Reader` itself.
  //
  // If not `absl::nullptr` and `read_all_hint()` is `true`, this causes larger
  // buffer sizes to be used before reaching `*exact_size()`.
  //
  // Also, if not `absl::nullptr`, this causes a smaller buffer size to be used
  // when reaching `*exact_size()`.
  void set_exact_size(std::optional<Position> exact_size) {
    exact_size_ = exact_size;
  }
  std::optional<Position> exact_size() const { return exact_size_; }

  // Called at the beginning of a run.
  //
  // This must be called during initialization if reading starts from a position
  // greater than 0.
  //
  // `BeginRun()` may be called again without an intervening `EndRun()`.
  void BeginRun(Position pos) { base_pos_ = pos; }

  // Called at the end of a run.
  //
  // `EndRun()` may be called again without an intervening `BeginRun()`.
  //
  // Precondition:
  //   `pos >= base_pos`, where `base_pos` is the argument of the last call to
  //       `BeginRun()`, if `BeginRun()` has been called
  void EndRun(Position pos);

  // Proposed a buffer length for reading at `pos`.
  //
  // The length will not let the next position exceed `exact_size()`,
  // in particular it is 0 if `exact_size() != nullptr && pos >= *exact_size()`.
  //
  // It will be at least `min_length` unless `exact_size()` is reached,
  // preferably `recommended_length`.
  //
  // Precondition:
  //   `pos >= base_pos`, where `base_pos` is the argument of the last call to
  //       `BeginRun()`, if `BeginRun()` has been called
  size_t BufferLength(Position pos, size_t min_length = 0,
                      size_t recommended_length = 0) const;

 private:
  BufferOptions buffer_options_;
  // Position where the current run started.
  Position base_pos_ = 0;
  // Buffer size recommended by the previous run.
  size_t buffer_length_from_last_run_ = 0;
  bool read_all_hint_ = false;
  std::optional<Position> exact_size_;
};

// Recommends an adaptive buffer length based on the access pattern of a
// `Writer`.
//
// The buffer length grows geometrically from `min_buffer_size` to
// `max_buffer_size` through each run of sequential writing operations.
//
// A new run may begin from a function which forces using a new buffer (mainly
// `Writer::Seek()` or `Writer::Flush()`). The buffer length for the new run is
// optimized for the case when the new run will have a similar length to the
// previous non-empty run.
//
// The lengths aim at letting absolute positions be multiples of sufficiently
// large powers of 2. Aligned positions might make writing to the destination
// more efficient.
class WriteBufferSizer {
 public:
  WriteBufferSizer() = default;

  explicit WriteBufferSizer(BufferOptions buffer_options);

  WriteBufferSizer(const WriteBufferSizer& that) = default;
  WriteBufferSizer& operator=(const WriteBufferSizer& that) = default;

  ABSL_ATTRIBUTE_REINITIALIZES void Reset();
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(BufferOptions buffer_options);

  // Returns the options passed to the constructor.
  BufferOptions buffer_options() const { return buffer_options_; }

  // Intended storage for the hint set by
  // `{,Backward}Writer::SetWriteSizeHint()`.
  //
  // If not `absl::nullptr`, this causes larger buffer sizes to be used before
  // reaching `*size_hint()`, and a smaller buffer size to be used when reaching
  // `*size_hint()`.
  void set_write_size_hint(Position pos,
                           std::optional<Position> write_size_hint) {
    if (write_size_hint == std::nullopt) {
      size_hint_ = std::nullopt;
    } else {
      size_hint_ = SaturatingAdd(pos, *write_size_hint);
    }
  }
  std::optional<Position> size_hint() const { return size_hint_; }

  // Called at the beginning of a run.
  //
  // This must be called during initialization if writing starts from a position
  // greater than 0.
  //
  // `BeginRun()` may be called again without an intervening `EndRun()`.
  void BeginRun(Position pos) { base_pos_ = pos; }

  // Called at the end of a run.
  //
  // `EndRun()` may be called again without an intervening `BeginRun()`.
  //
  // Precondition:
  //   `pos >= base_pos`, where `base_pos` is the argument of the last call to
  //       `BeginRun()`, if `BeginRun()` has been called
  void EndRun(Position pos);

  // Proposed a buffer length for writing at `pos`.
  //
  // The length will be at least `min_length`, preferably `recommended_length`.
  //
  // Precondition:
  //   `pos >= base_pos`, where `base_pos` is the argument of the last call to
  //       `BeginRun()`, if `BeginRun()` has been called
  size_t BufferLength(Position pos, size_t min_length = 0,
                      size_t recommended_length = 0) const;

 private:
  BufferOptions buffer_options_;
  // Position where the current run started.
  Position base_pos_ = 0;
  // Buffer size recommended by the previous run.
  size_t buffer_length_from_last_run_ = 0;
  std::optional<Position> size_hint_;
};

// Implementation details follow.

inline ReadBufferSizer::ReadBufferSizer(BufferOptions buffer_options)
    : buffer_options_(buffer_options) {}

inline void ReadBufferSizer::Reset() {
  buffer_options_ = BufferOptions();
  base_pos_ = 0;
  buffer_length_from_last_run_ = 0;
  read_all_hint_ = false;
  exact_size_ = std::nullopt;
}

inline void ReadBufferSizer::Reset(BufferOptions buffer_options) {
  buffer_options_ = buffer_options;
  base_pos_ = 0;
  buffer_length_from_last_run_ = 0;
  read_all_hint_ = false;
  exact_size_ = std::nullopt;
}

inline void ReadBufferSizer::EndRun(Position pos) {
  RIEGELI_ASSERT_GE(pos, base_pos_)
      << "Failed precondition of ReadBufferSizer::EndRun(): "
      << "position earlier than base position of the run";
  if (pos == base_pos_) return;
  const size_t length = SaturatingIntCast<size_t>(pos - base_pos_);
  // Increase the length to compensate for variability of the lengths, and for
  // rounding the positions so that even after rounding the length down to a
  // power of 2 the last length is covered.
  buffer_length_from_last_run_ = SaturatingAdd(length, length - 1);
}

inline WriteBufferSizer::WriteBufferSizer(BufferOptions buffer_options)
    : buffer_options_(buffer_options) {}

inline void WriteBufferSizer::Reset() {
  buffer_options_ = BufferOptions();
  base_pos_ = 0;
  buffer_length_from_last_run_ = 0;
  size_hint_ = std::nullopt;
}

inline void WriteBufferSizer::Reset(BufferOptions buffer_options) {
  buffer_options_ = buffer_options;
  base_pos_ = 0;
  buffer_length_from_last_run_ = 0;
  size_hint_ = std::nullopt;
}

inline void WriteBufferSizer::EndRun(Position pos) {
  RIEGELI_ASSERT_GE(pos, base_pos_)
      << "Failed precondition of WriteBufferSizer::EndRun(): "
      << "position earlier than base position of the run";
  if (pos == base_pos_) return;
  const size_t length = SaturatingIntCast<size_t>(pos - base_pos_);
  // Increase the length to compensate for variability of the lengths, and for
  // rounding the positions so that even after rounding the length down to a
  // power of 2 the last length is covered.
  buffer_length_from_last_run_ = SaturatingAdd(length, length - 1);
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_BUFFER_OPTIONS_H_


================================================
FILE: riegeli/bytes/buffered_reader.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/buffered_reader.h"

#include <stddef.h>

#include <limits>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/sized_shared_buffer.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void BufferedReader::Done() {
  if (available() > 0) {
    if (!SupportsRandomAccess()) {
      // Seeking back is not feasible.
      Reader::Done();
      buffer_ = SizedSharedBuffer();
      return;
    }
    const Position new_pos = pos();
    set_buffer();
    SeekBehindBuffer(new_pos);
  }
  Reader::Done();
  buffer_ = SizedSharedBuffer();
}

inline void BufferedReader::SyncBuffer() {
  set_buffer();
  buffer_.Clear();
}

void BufferedReader::SetReadAllHintImpl(bool read_all_hint) {
  buffer_sizer_.set_read_all_hint(read_all_hint);
}

void BufferedReader::ExactSizeReached() {}

bool BufferedReader::PullSlow(size_t min_length, size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Reader::PullSlow(): "
         "enough data available, use Pull() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const size_t available_length = available();
  const size_t buffer_length = buffer_sizer_.BufferLength(
      limit_pos(), min_length - available_length,
      SaturatingSub(recommended_length, available_length));
  if (ABSL_PREDICT_FALSE(buffer_length == 0)) {
    ExactSizeReached();
    return false;
  }
  size_t cursor_index = start_to_cursor();
  absl::Span<char> flat_buffer = buffer_.AppendBufferIfExisting(buffer_length);
  if (flat_buffer.empty()) {
    // Not enough space in `buffer_`. Resize `buffer_`, keeping available data.
    buffer_.RemovePrefix(cursor_index);
    buffer_.Shrink(available_length + buffer_length);
    cursor_index = 0;
    flat_buffer = buffer_.AppendFixedBuffer(buffer_length);
  }
  // Read more data into `buffer_`.
  const size_t min_length_to_read =
      ToleratesReadingAhead()
          ? buffer_length
          : UnsignedMin(min_length - available_length, buffer_length);
  const Position pos_before = limit_pos();
  const bool read_ok =
      ReadInternal(min_length_to_read, buffer_length, flat_buffer.data());
  RIEGELI_ASSERT_GE(limit_pos(), pos_before)
      << "BufferedReader::ReadInternal() decreased limit_pos()";
  const Position length_read = limit_pos() - pos_before;
  RIEGELI_ASSERT_LE(length_read, buffer_length)
      << "BufferedReader::ReadInternal() read more than requested";
  if (read_ok) {
    RIEGELI_ASSERT_GE(length_read, min_length_to_read)
        << "BufferedReader::ReadInternal() succeeded but "
           "read less than requested";
  } else {
    RIEGELI_ASSERT_LT(length_read, min_length_to_read)
        << "BufferedReader::ReadInternal() failed but read enough";
  }
  buffer_.RemoveSuffix(flat_buffer.size() - IntCast<size_t>(length_read));
  set_buffer(buffer_.data(), buffer_.size(), cursor_index);
  return available() >= min_length;
}

bool BufferedReader::SeekBehindBuffer(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(new_pos <= limit_pos())) {
    return Fail(
        absl::UnimplementedError("Reader::Seek() backwards not supported"));
  }
  // Seeking forwards.
  do {
    move_cursor(available());
    if (ABSL_PREDICT_FALSE(!PullSlow(1, 0))) return false;
  } while (new_pos > limit_pos());
  const Position available_length = limit_pos() - new_pos;
  RIEGELI_ASSERT_LE(available_length, start_to_limit())
      << "Reader::PullSlow() skipped some data";
  set_cursor(limit() - available_length);
  return true;
}

bool BufferedReader::ReadSlow(size_t length, char* dest) {
  RIEGELI_ASSERT_LT(available(), length)
      << "Failed precondition of Reader::ReadSlow(char*): "
         "enough data available, use Read(char*) instead";
  if (length >= buffer_sizer_.BufferLength(pos())) {
    // Read directly to `dest`.
    const size_t available_length = available();
    riegeli::null_safe_memcpy(dest, cursor(), available_length);
    dest += available_length;
    length -= available_length;
    SyncBuffer();
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    size_t length_to_read = length;
    if (exact_size() != std::nullopt) {
      if (ABSL_PREDICT_FALSE(limit_pos() >= *exact_size())) {
        ExactSizeReached();
        return false;
      }
      length_to_read = UnsignedMin(length_to_read, *exact_size() - limit_pos());
    }
    if (ABSL_PREDICT_FALSE(
            !ReadInternal(length_to_read, length_to_read, dest))) {
      return false;
    }
    return length_to_read >= length;
  }
  return Reader::ReadSlow(length, dest);
}

bool BufferedReader::ReadSlow(size_t length, Chain& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "enough data available, use Read(Chain&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "Chain size overflow";
  bool enough_read = true;
  while (length > available()) {
    size_t available_length = available();
    if (ABSL_PREDICT_FALSE(!ok())) {
      // Read as much as is available.
      enough_read = false;
      length = available_length;
      break;
    }
    const size_t buffer_length =
        buffer_sizer_.BufferLength(limit_pos(), 1, length - available_length);
    size_t cursor_index = start_to_cursor();
    absl::Span<char> flat_buffer =
        buffer_.AppendBufferIfExisting(buffer_length);
    if (flat_buffer.empty()) {
      // Not enough space in `buffer_`. Append available data to `dest` and make
      // a new buffer.
      dest.Append(ExternalRef(std::move(buffer_),
                              absl::string_view(cursor(), available_length)));
      length -= available_length;
      buffer_.ClearAndShrink(buffer_length);
      if (ABSL_PREDICT_FALSE(buffer_length == 0)) {
        set_buffer();
        ExactSizeReached();
        return false;
      }
      available_length = 0;
      cursor_index = 0;
      flat_buffer = buffer_.AppendFixedBuffer(buffer_length);
    }
    // Read more data into `buffer_`.
    const size_t min_length_to_read =
        ToleratesReadingAhead()
            ? buffer_length
            : UnsignedMin(length - available_length, buffer_length);
    const Position pos_before = limit_pos();
    const bool read_ok =
        ReadInternal(min_length_to_read, buffer_length, flat_buffer.data());
    RIEGELI_ASSERT_GE(limit_pos(), pos_before)
        << "BufferedReader::ReadInternal() decreased limit_pos()";
    const Position length_read = limit_pos() - pos_before;
    RIEGELI_ASSERT_LE(length_read, buffer_length)
        << "BufferedReader::ReadInternal() read more than requested";
    if (read_ok) {
      RIEGELI_ASSERT_GE(length_read, min_length_to_read)
          << "BufferedReader::ReadInternal() succeeded but "
             "read less than requested";
    } else {
      RIEGELI_ASSERT_LT(length_read, min_length_to_read)
          << "BufferedReader::ReadInternal() failed but read enough";
    }
    buffer_.RemoveSuffix(flat_buffer.size() - IntCast<size_t>(length_read));
    set_buffer(buffer_.data(), buffer_.size(), cursor_index);
    if (ABSL_PREDICT_FALSE(!read_ok)) {
      // Read as much as is available.
      enough_read = available() >= length;
      if (ABSL_PREDICT_FALSE(!enough_read)) length = available();
      break;
    }
  }
  dest.Append(ExternalRef(buffer_, absl::string_view(cursor(), length)));
  move_cursor(length);
  return enough_read;
}

bool BufferedReader::ReadSlow(size_t length, absl::Cord& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "enough data available, use Read(Cord&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "Cord size overflow";
  bool enough_read = true;
  while (length > available()) {
    size_t available_length = available();
    if (ABSL_PREDICT_FALSE(!ok())) {
      // Read as much as is available.
      enough_read = false;
      length = available_length;
      break;
    }
    const size_t buffer_length =
        buffer_sizer_.BufferLength(limit_pos(), 1, length - available_length);
    size_t cursor_index = start_to_cursor();
    absl::Span<char> flat_buffer =
        buffer_.AppendBufferIfExisting(buffer_length);
    if (flat_buffer.empty()) {
      // Not enough space in `buffer_`. Append available data to `dest` and make
      // a new buffer.
      ExternalRef(std::move(buffer_),
                  absl::string_view(cursor(), available_length))
          .AppendTo(dest);
      length -= available_length;
      buffer_.ClearAndShrink(buffer_length);
      if (ABSL_PREDICT_FALSE(buffer_length == 0)) {
        set_buffer();
        ExactSizeReached();
        return false;
      }
      available_length = 0;
      cursor_index = 0;
      flat_buffer = buffer_.AppendFixedBuffer(buffer_length);
    }
    // Read more data into `buffer_`.
    const size_t min_length_to_read =
        ToleratesReadingAhead()
            ? buffer_length
            : UnsignedMin(length - available_length, buffer_length);
    const Position pos_before = limit_pos();
    const bool read_ok =
        ReadInternal(min_length_to_read, buffer_length, flat_buffer.data());
    RIEGELI_ASSERT_GE(limit_pos(), pos_before)
        << "BufferedReader::ReadInternal() decreased limit_pos()";
    const Position length_read = limit_pos() - pos_before;
    RIEGELI_ASSERT_LE(length_read, buffer_length)
        << "BufferedReader::ReadInternal() read more than requested";
    if (read_ok) {
      RIEGELI_ASSERT_GE(length_read, min_length_to_read)
          << "BufferedReader::ReadInternal() succeeded but "
             "read less than requested";
    } else {
      RIEGELI_ASSERT_LT(length_read, min_length_to_read)
          << "BufferedReader::ReadInternal() failed but read enough";
    }
    buffer_.RemoveSuffix(flat_buffer.size() - IntCast<size_t>(length_read));
    set_buffer(buffer_.data(), buffer_.size(), cursor_index);
    if (ABSL_PREDICT_FALSE(!read_ok)) {
      // Read as much as is available.
      enough_read = available() >= length;
      if (ABSL_PREDICT_FALSE(!enough_read)) length = available();
      break;
    }
  }
  ExternalRef(buffer_, absl::string_view(cursor(), length)).AppendTo(dest);
  move_cursor(length);
  return enough_read;
}

bool BufferedReader::CopySlow(Position length, Writer& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::CopySlow(Writer&): "
         "enough data available, use Copy(Writer&) instead";
  bool enough_read = true;
  while (length > available()) {
    size_t available_length = available();
    if (ABSL_PREDICT_FALSE(!ok())) {
      // Copy as much as is available.
      enough_read = false;
      length = available_length;
      break;
    }
    const bool read_directly = length >= buffer_sizer_.BufferLength(pos());
    if (read_directly) {
      if (available_length <= kMaxBytesToCopy) {
        if (ABSL_PREDICT_FALSE(
                !dest.Write(absl::string_view(cursor(), available_length)))) {
          move_cursor(available_length);
          return false;
        }
        length -= available_length;
        SyncBuffer();
        return CopyUsingPush(length, dest);
      }
      // It is better to write available data from `buffer_` as a `Chain` before
      // reading directly to `dest`. Before that, `buffer_` might need to be
      // filled more to avoid attaching a wasteful `Chain`.
    }
    const size_t buffer_length =
        buffer_sizer_.BufferLength(limit_pos(), 1, length - available_length);
    size_t cursor_index = start_to_cursor();
    absl::Span<char> flat_buffer =
        buffer_.AppendBufferIfExisting(buffer_length);
    if (flat_buffer.empty()) {
      // Not enough space in `buffer_`. Append available data to `dest` and make
      // a new buffer.
      if (available_length > 0) {
        const bool write_ok = dest.Write(ExternalRef(
            std::move(buffer_), absl::string_view(cursor(), available_length)));
        if (ABSL_PREDICT_FALSE(!write_ok)) {
          buffer_.ClearAndShrink(buffer_length);
          set_buffer();
          return false;
        }
        length -= available_length;
      }
      buffer_.ClearAndShrink(buffer_length);
      if (ABSL_PREDICT_FALSE(buffer_length == 0)) {
        set_buffer();
        ExactSizeReached();
        return false;
      }
      if (read_directly) {
        set_buffer();
        return CopyUsingPush(length, dest);
      }
      available_length = 0;
      cursor_index = 0;
      flat_buffer = buffer_.AppendFixedBuffer(buffer_length);
    }
    // Read more data into `buffer_`.
    const size_t min_length_to_read =
        ToleratesReadingAhead()
            ? buffer_length
            : UnsignedMin(length - available_length, buffer_length);
    const Position pos_before = limit_pos();
    const bool read_ok =
        ReadInternal(min_length_to_read, buffer_length, flat_buffer.data());
    RIEGELI_ASSERT_GE(limit_pos(), pos_before)
        << "BufferedReader::ReadInternal() decreased limit_pos()";
    const Position length_read = limit_pos() - pos_before;
    RIEGELI_ASSERT_LE(length_read, buffer_length)
        << "BufferedReader::ReadInternal() read more than requested";
    if (read_ok) {
      RIEGELI_ASSERT_GE(length_read, min_length_to_read)
          << "BufferedReader::ReadInternal() succeeded but "
             "read less than requested";
    } else {
      RIEGELI_ASSERT_LT(length_read, min_length_to_read)
          << "BufferedReader::ReadInternal() failed but read enough";
    }
    buffer_.RemoveSuffix(flat_buffer.size() - IntCast<size_t>(length_read));
    set_buffer(buffer_.data(), buffer_.size(), cursor_index);
    if (ABSL_PREDICT_FALSE(!read_ok)) {
      // Copy as much as is available.
      enough_read = available() >= length;
      if (ABSL_PREDICT_FALSE(!enough_read)) length = available();
      break;
    }
  }
  const bool write_ok = dest.Write(ExternalRef(
      buffer_, absl::string_view(cursor(), IntCast<size_t>(length))));
  move_cursor(IntCast<size_t>(length));
  return write_ok && enough_read;
}

inline bool BufferedReader::CopyUsingPush(Position length, Writer& dest) {
  RIEGELI_ASSERT_GT(length, 0u)
      << "Failed precondition of BufferedReader::CopyUsingPush(): "
         "nothing to copy";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedReader::CopyUsingPush()";
  Position length_to_read = length;
  if (exact_size() != std::nullopt) {
    if (ABSL_PREDICT_FALSE(limit_pos() >= *exact_size())) {
      ExactSizeReached();
      return false;
    }
    length_to_read = UnsignedMin(length_to_read, *exact_size() - limit_pos());
  }
  return CopyInternal(length_to_read, dest) && length_to_read == length;
}

bool BufferedReader::CopyInternal(Position length, Writer& dest) {
  RIEGELI_ASSERT_GT(length, 0u)
      << "Failed precondition of BufferedReader::CopyInternal(): "
         "nothing to copy";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedReader::CopyInternal()";
  size_t length_to_read = SaturatingIntCast<size_t>(length);
  // In the first iteration `exact_size()` was taken into account by
  // `CopyUsingPush()`, so that `CopyInternal()` overrides do not need to.
  for (;;) {
    if (ABSL_PREDICT_FALSE(!dest.Push(1, length_to_read))) return false;
    const size_t length_to_copy = UnsignedMin(length_to_read, dest.available());
    const Position pos_before = limit_pos();
    const bool read_ok =
        ReadInternal(length_to_copy, length_to_copy, dest.cursor());
    RIEGELI_ASSERT_GE(limit_pos(), pos_before)
        << "BufferedReader::ReadInternal() decreased limit_pos()";
    const Position length_read = limit_pos() - pos_before;
    RIEGELI_ASSERT_LE(length_read, length_to_copy)
        << "BufferedReader::ReadInternal() read more than requested";
    if (read_ok) {
      RIEGELI_ASSERT_GE(length_read, length_to_copy)
          << "BufferedReader::ReadInternal() succeeded but "
             "read less than requested";
    } else {
      RIEGELI_ASSERT_LT(length_read, length_to_copy)
          << "BufferedReader::ReadInternal() failed but read enough";
    }
    dest.move_cursor(IntCast<size_t>(length_read));
    if (ABSL_PREDICT_FALSE(!read_ok)) return false;
    length -= IntCast<size_t>(length_read);
    if (length == 0) return true;
    // `ReadInternal()` might have set `exact_size()`, so this implementation of
    // `CopyInternal()` needs to take `exact_size()` into account for remaining
    // iterations.
    length_to_read = SaturatingIntCast<size_t>(length);
    if (exact_size() != std::nullopt) {
      if (ABSL_PREDICT_FALSE(limit_pos() >= *exact_size())) {
        ExactSizeReached();
        return false;
      }
      length_to_read = UnsignedMin(length_to_read, *exact_size() - limit_pos());
    }
  }
}

bool BufferedReader::CopySlow(size_t length, BackwardWriter& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::CopySlow(BackwardWriter&): "
         "enough data available, use Copy(BackwardWriter&) instead";
  if (length <= kMaxBytesToCopy) {
    if (ABSL_PREDICT_FALSE(!dest.Push(length))) return false;
    dest.move_cursor(length);
    if (ABSL_PREDICT_FALSE(!ReadSlow(length, dest.cursor()))) {
      dest.set_cursor(dest.cursor() + length);
      return false;
    }
    return true;
  }
  Chain data;
  if (ABSL_PREDICT_FALSE(!ReadSlow(length, data))) return false;
  return dest.Write(std::move(data));
}

bool BufferedReader::ReadSomeSlow(size_t max_length, char* dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "nothing to read, use ReadSome(char*) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "some data available, use ReadSome(char*) instead";
  if (max_length >= buffer_sizer_.BufferLength(limit_pos())) {
    // Read directly to `dest`.
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    SyncBuffer();
    if (exact_size() != std::nullopt) {
      if (ABSL_PREDICT_FALSE(limit_pos() >= *exact_size())) {
        ExactSizeReached();
        return false;
      }
      max_length = UnsignedMin(max_length, *exact_size() - limit_pos());
    }
    const size_t min_length_to_read = ToleratesReadingAhead() ? max_length : 1;
    const Position pos_before = limit_pos();
    ReadInternal(min_length_to_read, max_length, dest);
    RIEGELI_ASSERT_GE(limit_pos(), pos_before)
        << "BufferedReader::ReadInternal() decreased limit_pos()";
    return limit_pos() != pos_before;
  }
  return Reader::ReadSomeSlow(max_length, dest);
}

bool BufferedReader::CopySomeSlow(size_t max_length, Writer& dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::CopySomeSlow(Writer&): "
         "nothing to read, use CopySome(Writer&) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::CopySomeSlow(Writer&): "
         "some data available, use CopySome(Writer&) instead";
  if (max_length >= buffer_sizer_.BufferLength(limit_pos())) {
    // Copy directly to `dest`.
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    SyncBuffer();
    if (exact_size() != std::nullopt) {
      if (ABSL_PREDICT_FALSE(limit_pos() >= *exact_size())) {
        ExactSizeReached();
        return false;
      }
      max_length = UnsignedMin(max_length, *exact_size() - limit_pos());
    }
    if (ABSL_PREDICT_FALSE(!dest.Push(1, max_length))) return false;
    max_length = UnsignedMin(max_length, dest.available());
    const size_t min_length_to_read = ToleratesReadingAhead() ? max_length : 1;
    const Position pos_before = limit_pos();
    ReadInternal(min_length_to_read, max_length, dest.cursor());
    RIEGELI_ASSERT_GE(limit_pos(), pos_before)
        << "BufferedReader::ReadInternal() decreased limit_pos()";
    const Position length_read = limit_pos() - pos_before;
    RIEGELI_ASSERT_LE(length_read, max_length)
        << "BufferedReader::ReadInternal() read more than requested";
    dest.move_cursor(IntCast<size_t>(length_read));
    return length_read > 0;
  }
  return Reader::CopySomeSlow(max_length, dest);
}

void BufferedReader::ReadHintSlow(size_t min_length,
                                  size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Reader::ReadHintSlow(): "
         "enough data available, use ReadHint() instead";
  PullSlow(min_length, recommended_length);
}

bool BufferedReader::SyncImpl(SyncType sync_type) {
  if (available() > 0 && !SupportsRandomAccess()) {
    // Seeking back is not feasible.
    return ok();
  }
  const Position new_pos = pos();
  buffer_sizer_.EndRun(new_pos);
  SyncBuffer();
  const bool result = new_pos == limit_pos() ? ok() : SeekBehindBuffer(new_pos);
  buffer_sizer_.BeginRun(start_pos());
  return result;
}

bool BufferedReader::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of Reader::SeekSlow(): "
         "position in the buffer, use Seek() instead";
  if (ABSL_PREDICT_FALSE(!SupportsRandomAccess())) {
    SyncBuffer();
    return SeekBehindBuffer(new_pos);
  }
  buffer_sizer_.EndRun(pos());
  SyncBuffer();
  const bool result = SeekBehindBuffer(new_pos);
  buffer_sizer_.BeginRun(start_pos());
  return result;
}

std::optional<Position> BufferedReader::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  if (ABSL_PREDICT_FALSE(exact_size() == std::nullopt)) {
    // Delegate to the base class to avoid repeating the error message.
    return Reader::SizeImpl();
  }
  return *exact_size();
}

void BufferedReader::ShareBufferTo(BufferedReader& reader) const {
  const Position new_pos = reader.pos();
  if (new_pos >= start_pos() && new_pos < limit_pos()) {
    reader.buffer_ = buffer_;
    reader.set_buffer(start(), start_to_limit(),
                      IntCast<size_t>(new_pos - start_pos()));
    reader.set_limit_pos(limit_pos());
  }
}

SizedSharedBuffer BufferedReader::SaveBuffer() {
  set_limit_pos(pos());
  buffer_.RemovePrefix(start_to_cursor());
  set_buffer();
  return std::move(buffer_);
}

void BufferedReader::RestoreBuffer(SizedSharedBuffer buffer) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedReader::RestoreBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!ok())) return;
  buffer_ = std::move(buffer);
  set_buffer(buffer_.data(), buffer_.size());
  move_limit_pos(available());
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/buffered_reader.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_BUFFERED_READER_H_
#define RIEGELI_BYTES_BUFFERED_READER_H_

#include <stddef.h>

#include <optional>
#include <utility>

#include "absl/strings/cord.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/object.h"
#include "riegeli/base/sized_shared_buffer.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

class BackwardWriter;
class Writer;

// Abstract class `BufferedReader` helps to implement a `Reader` for an
// underlying source which provides data by copying to external byte arrays,
// e.g. like in the `read()` syscall.
//
// `BufferedReader` accumulates data which has been pulled in a flat buffer.
// Reading a large enough array bypasses the buffer.
class BufferedReader : public Reader {
 public:
  // Derived classes which override `ToleratesReadingAhead()` further should
  // return `true` when `BufferedReader::ToleratesReadingAhead()`, and possibly
  // also in some other cases.
  bool ToleratesReadingAhead() override { return read_all_hint(); }

  bool SupportsSize() override { return exact_size() != std::nullopt; }

 protected:
  // Creates a closed `BufferedReader`.
  explicit BufferedReader(Closed) noexcept : Reader(kClosed) {}

  explicit BufferedReader(
      BufferOptions buffer_options = BufferOptions()) noexcept;

  BufferedReader(BufferedReader&& that) noexcept;
  BufferedReader& operator=(BufferedReader&& that) noexcept;

  // Makes `*this` equivalent to a newly constructed `BufferedReader`. This
  // avoids constructing a temporary `BufferedReader` and moving from it.
  // Derived classes which redefine `Reset()` should include a call to
  // `BufferedReader::Reset()`.
  void Reset(Closed);
  void Reset(BufferOptions buffer_options = BufferOptions());

  void Done() override;

  // Returns the options passed to the constructor.
  BufferOptions buffer_options() const {
    return buffer_sizer_.buffer_options();
  }

  // Storage for the hint set by `Reader::SetReadAllHint()`.
  //
  // If `true` and `exact_size()` is not `absl::nullptr`, this causes larger
  // buffer sizes to be used before reaching `*exact_size()`.
  bool read_all_hint() const { return buffer_sizer_.read_all_hint(); }

  // Storage for an exact size of the source, as discovered by the `Reader`
  // itself.
  //
  // If not `absl::nullptr` and `read_all_hint()` is `true`, this causes larger
  // buffer sizes to be used before reaching `*exact_size()`.
  //
  // Also, if not `absl::nullptr`, this causes a smaller buffer size to be used
  // when reaching `*exact_size()`.
  void set_exact_size(std::optional<Position> exact_size) {
    buffer_sizer_.set_exact_size(exact_size);
  }
  std::optional<Position> exact_size() const {
    return buffer_sizer_.exact_size();
  }

  // In derived classes this must be called during initialization if reading
  // starts from a position greater than 0.
  void BeginRun() { buffer_sizer_.BeginRun(start_pos()); }

  // `BufferedReader::{Done,SyncImpl}()` seek the source back to the current
  // position if not all buffered data were read. This is feasible only if
  // `SupportsRandomAccess()`.
  //
  // Warning: if `!SupportsRandomAccess()`, the source will have an
  // unpredictable amount of extra data consumed because of buffering.
  //
  // For propagating `{Close,Sync}()` to dependencies, `{Done,SyncImpl}()`
  // should be overridden to call `BufferedReader::{Done,SyncImpl}()` and then
  // close/sync the dependencies.

  // Reads data from the source, from the physical source position which is
  // `limit_pos()`.
  //
  // Tries to read at most `max_length`, but can return successfully after
  // reading at least `min_length` if less data was available in the source at
  // the moment.
  //
  // Does not use buffer pointers. Increments `limit_pos()` by the length read,
  // which must be in the range [`min_length`..`max_length`] on success. Returns
  // `true` on success.
  //
  // Preconditions:
  //   `0 < min_length <= max_length`
  //   `ok()`
  virtual bool ReadInternal(size_t min_length, size_t max_length,
                            char* dest) = 0;

  // Copies data from the source, from the physical source position which is
  // `limit_pos()`, to `dest`.
  //
  // Does not use buffer pointers of `*this`. Increments `limit_pos()` by the
  // length read, which must be `length` on success. Returns `true` on success.
  //
  // By default uses `Writer::Push()` and `ReadInternal()`.
  //
  // Preconditions:
  //   `length > 0`
  //   `ok()`
  virtual bool CopyInternal(Position length, Writer& dest);

  // Called when `exact_size()` was reached but reading more is requested.
  // In this case `ReadInternal()` was not called.
  //
  // By default does nothing. This can be overridden e.g. to ensure that a
  // compressed stream is fully consumed after decompressing all data.
  virtual void ExactSizeReached();

  // Implementation of `SeekSlow()`, called while no data are buffered.
  //
  // By default it is implemented analogously to the corresponding `Reader`
  // function.
  //
  // Preconditions:
  //   like the corresponding `Reader` function
  //   `start_to_limit() == 0`
  virtual bool SeekBehindBuffer(Position new_pos);

  void SetReadAllHintImpl(bool read_all_hint) override;
  bool PullSlow(size_t min_length, size_t recommended_length) override;
  using Reader::ReadSlow;
  bool ReadSlow(size_t length, char* dest) override;
  bool ReadSlow(size_t length, Chain& dest) override;
  bool ReadSlow(size_t length, absl::Cord& dest) override;
  using Reader::CopySlow;
  bool CopySlow(Position length, Writer& dest) override;
  bool CopySlow(size_t length, BackwardWriter& dest) override;
  using Reader::ReadSomeSlow;
  bool ReadSomeSlow(size_t max_length, char* dest) override;
  using Reader::CopySomeSlow;
  bool CopySomeSlow(size_t max_length, Writer& dest) override;
  void ReadHintSlow(size_t min_length, size_t recommended_length) override;
  bool SyncImpl(SyncType sync_type) override;
  bool SeekSlow(Position new_pos) override;
  std::optional<Position> SizeImpl() override;

  // Reuses `buffer_` as `reader.buffer_` if `reader.pos()` falls inside.
  void ShareBufferTo(BufferedReader& reader) const;

  // Extracts available data from the buffer, leaving it empty.
  //
  // `SaveBuffer()` is meant to be called in `Done()` to preserve pending data
  // across instances reading from the same source.
  SizedSharedBuffer SaveBuffer();

  // Restores available data to the buffer.
  //
  // `RestoreBuffer()` is meant to be called in a constructor or `Reset()` with
  // data previously returned by `SaveBuffer()`, to preserve pending data across
  // instances reading from the same source.
  //
  // Precondition: `start_to_limit() == 0`
  void RestoreBuffer(SizedSharedBuffer buffer);

 private:
  // Discards buffer contents and sets buffer pointers to `nullptr`.
  //
  // This can move `pos()` forwards to account for skipping over previously
  // buffered data. `limit_pos()` remains unchanged.
  void SyncBuffer();

  // Implementation of `CopySlow(Writer&)` in terms of `Writer::Push()` and
  // `ReadInternal()`. Does not use buffer pointers.
  //
  // Precondition: `length > 0`
  bool CopyUsingPush(Position length, Writer& dest);

  ReadBufferSizer buffer_sizer_;
  // Buffered data, read directly before the physical source position which is
  // `limit_pos()`.
  SizedSharedBuffer buffer_;

  // Invariants:
  //   if `!buffer_.empty()` then `start() == buffer_.data()`
  //   `start_to_limit() == buffer_.size()`
};

// Implementation details follow.

inline BufferedReader::BufferedReader(BufferOptions buffer_options) noexcept
    : buffer_sizer_(buffer_options) {}

inline BufferedReader::BufferedReader(BufferedReader&& that) noexcept
    : Reader(static_cast<Reader&&>(that)),
      buffer_sizer_(that.buffer_sizer_),
      buffer_(std::move(that.buffer_)) {}

inline BufferedReader& BufferedReader::operator=(
    BufferedReader&& that) noexcept {
  Reader::operator=(static_cast<Reader&&>(that));
  buffer_sizer_ = that.buffer_sizer_;
  buffer_ = std::move(that.buffer_);
  return *this;
}

inline void BufferedReader::Reset(Closed) {
  Reader::Reset(kClosed);
  buffer_sizer_.Reset();
  buffer_ = SizedSharedBuffer();
}

inline void BufferedReader::Reset(BufferOptions buffer_options) {
  Reader::Reset();
  buffer_sizer_.Reset(buffer_options);
  buffer_.Clear();
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_BUFFERED_READER_H_


================================================
FILE: riegeli/bytes/buffered_writer.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/buffered_writer.h"

#include <stddef.h>

#include <cstring>
#include <limits>
#include <optional>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffer.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void BufferedWriter::Done() {
  const absl::string_view src(start(),
                              UnsignedMax(start_to_cursor(), written_));
  const Position new_pos = pos();
  set_buffer();
  written_ = 0;
  DoneBehindBuffer(src);
  if (ABSL_PREDICT_FALSE(start_pos() != new_pos) && ABSL_PREDICT_TRUE(ok())) {
    SeekBehindBuffer(new_pos);
  }
  Writer::Done();
  buffer_ = Buffer();
}

void BufferedWriter::DoneBehindBuffer(absl::string_view src) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::DoneBehindBuffer(): "
         "buffer not empty";
  FlushBehindBuffer(src, FlushType::kFromObject);
}

inline bool BufferedWriter::SyncBuffer() {
  const absl::string_view data(start(),
                               UnsignedMax(start_to_cursor(), written_));
  const Position new_pos = pos();
  set_buffer();
  written_ = 0;
  if (data.empty()) return true;
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(!WriteInternal(data))) return false;
  if (ABSL_PREDICT_FALSE(start_pos() != new_pos)) {
    return SeekBehindBuffer(new_pos);
  }
  return true;
}

void BufferedWriter::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  buffer_sizer_.set_write_size_hint(pos(), write_size_hint);
}

bool BufferedWriter::PushSlow(size_t min_length, size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Writer::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(!SyncBuffer())) return false;
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(min_length >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  const size_t buffer_length = UnsignedMin(
      buffer_sizer_.BufferLength(start_pos(), min_length, recommended_length),
      std::numeric_limits<Position>::max() - start_pos());
  buffer_.Reset(buffer_length);
  set_buffer(buffer_.data(), buffer_length);
  return true;
}

bool BufferedWriter::FlushBehindBuffer(absl::string_view src,
                                       FlushType flush_type) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::FlushBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (src.empty()) return true;
  return WriteInternal(src);
}

bool BufferedWriter::SeekBehindBuffer(Position new_pos) {
  RIEGELI_ASSERT_NE(new_pos, pos())
      << "Failed precondition of BufferedWriter::SeekBehindBuffer(): "
         "position unchanged, use Seek() instead";
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::SeekBehindBuffer(): "
         "buffer not empty";
  return Fail(absl::UnimplementedError("Writer::Seek() not supported"));
}

std::optional<Position> BufferedWriter::SizeBehindBuffer() {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::SizeBehindBuffer(): "
         "buffer not empty";
  Fail(absl::UnimplementedError("Writer::Size() not supported"));
  return std::nullopt;
}

bool BufferedWriter::TruncateBehindBuffer(Position new_size) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::TruncateBehindBuffer(): "
         "buffer not empty";
  return Fail(absl::UnimplementedError("Writer::Truncate() not supported"));
}

Reader* BufferedWriter::ReadModeBehindBuffer(Position initial_pos) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::ReadModeBehindBuffer(): "
         "buffer not empty";
  Fail(absl::UnimplementedError("Writer::ReadMode() not supported"));
  return nullptr;
}

bool BufferedWriter::WriteSlow(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of Writer::WriteSlow(string_view): "
         "enough space available, use Write(string_view) instead";
  if (src.size() >= buffer_sizer_.BufferLength(pos())) {
    // Write directly from `src`.
    if (ABSL_PREDICT_FALSE(!SyncBuffer())) return false;
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    return WriteInternal(src);
  }
  return Writer::WriteSlow(src);
}

bool BufferedWriter::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  for (const absl::string_view fragment : src.blocks()) {
    if (ABSL_PREDICT_FALSE(!Write(fragment))) return false;
  }
  return true;
}

bool BufferedWriter::FlushImpl(FlushType flush_type) {
  buffer_sizer_.EndRun(start_pos() + UnsignedMax(start_to_cursor(), written_));
  const absl::string_view src(start(),
                              UnsignedMax(start_to_cursor(), written_));
  const Position new_pos = pos();
  set_buffer();
  written_ = 0;
  if (ABSL_PREDICT_FALSE(!FlushBehindBuffer(src, flush_type))) return false;
  const bool result = start_pos() == new_pos || SeekBehindBuffer(new_pos);
  buffer_sizer_.BeginRun(start_pos());
  return result;
}

bool BufferedWriter::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT_NE(new_pos, pos())
      << "Failed precondition of Writer::SeekSlow(): "
         "position unchanged, use Seek() instead";
  if (ABSL_PREDICT_TRUE(
          SupportsRandomAccess() && new_pos >= start_pos() &&
          new_pos <= start_pos() + UnsignedMax(start_to_cursor(), written_))) {
    written_ = UnsignedMax(start_to_cursor(), written_);
    set_cursor(start() + IntCast<size_t>(new_pos - start_pos()));
    return true;
  }
  buffer_sizer_.EndRun(start_pos() + UnsignedMax(start_to_cursor(), written_));
  if (ABSL_PREDICT_FALSE(!SyncBuffer())) return false;
  const bool result = SeekBehindBuffer(new_pos);
  buffer_sizer_.BeginRun(start_pos());
  return result;
}

std::optional<Position> BufferedWriter::SizeImpl() {
  buffer_sizer_.EndRun(start_pos() + UnsignedMax(start_to_cursor(), written_));
  if (ABSL_PREDICT_FALSE(!SyncBuffer())) return std::nullopt;
  const std::optional<Position> size = SizeBehindBuffer();
  if (ABSL_PREDICT_FALSE(size == std::nullopt)) return std::nullopt;
  buffer_sizer_.BeginRun(start_pos());
  return *size;
}

bool BufferedWriter::TruncateImpl(Position new_size) {
  buffer_sizer_.EndRun(start_pos() + UnsignedMax(start_to_cursor(), written_));
  if (ABSL_PREDICT_FALSE(!SyncBuffer())) return false;
  const bool result = TruncateBehindBuffer(new_size);
  buffer_sizer_.BeginRun(start_pos());
  return result;
}

Reader* BufferedWriter::ReadModeImpl(Position initial_pos) {
  buffer_sizer_.EndRun(start_pos() + UnsignedMax(start_to_cursor(), written_));
  if (ABSL_PREDICT_FALSE(!SyncBuffer())) return nullptr;
  Reader* const reader = ReadModeBehindBuffer(initial_pos);
  if (ABSL_PREDICT_FALSE(reader == nullptr)) return nullptr;
  buffer_sizer_.BeginRun(start_pos());
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/buffered_writer.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_BUFFERED_WRITER_H_
#define RIEGELI_BYTES_BUFFERED_WRITER_H_

#include <stddef.h>

#include <optional>
#include <utility>

#include "absl/strings/string_view.h"
#include "riegeli/base/buffer.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

class Reader;

// Abstract class `BufferedWriter` helps to implement a `Writer` for an
// underlying destination which accepts data by copying from external byte
// arrays, e.g. like in the `write()` syscall.
//
// `BufferedWriter` accumulates data to be pushed in a flat buffer. Writing a
// large enough array bypasses the buffer.
class BufferedWriter : public Writer {
 protected:
  // Creates a closed `BufferedWriter`.
  explicit BufferedWriter(Closed) noexcept : Writer(kClosed) {}

  explicit BufferedWriter(
      BufferOptions buffer_options = BufferOptions()) noexcept;

  BufferedWriter(BufferedWriter&& that) noexcept;
  BufferedWriter& operator=(BufferedWriter&& that) noexcept;

  // Makes `*this` equivalent to a newly constructed `BufferedWriter`. This
  // avoids constructing a temporary `BufferedWriter` and moving from it.
  // Derived classes which redefine `Reset()` should include a call to
  // `BufferedWriter::Reset()`.
  void Reset(Closed);
  void Reset(BufferOptions buffer_options = BufferOptions());

  void Done() override;

  // Returns the options passed to the constructor.
  BufferOptions buffer_options() const {
    return buffer_sizer_.buffer_options();
  }

  // In derived classes this must be called during initialization if writing
  // starts from a position greater than 0.
  void BeginRun() { buffer_sizer_.BeginRun(start_pos()); }

  // `BufferedWriter::{Done,FlushImpl}()` call `{Done,Flush}BehindBuffer()` to
  // write the last piece of data and close/flush the destination.
  //
  // For propagating `{Close,Flush}()` to dependencies, `{Done,FlushImpl}()`
  // should be overridden to call `BufferedWriter::{Done,FlushImpl}()` and then
  // close/flush the dependencies.

  // Implementation of `Done()`, called with the last piece of data.
  //
  // By default calls `FlushBehindBuffer(FlushType::kFromObject)`, which by
  // default writes data to the destination. Can be overridden if writing
  // coupled with closing can be implemented better.
  //
  // Precondition: `start_to_limit() == 0`
  virtual void DoneBehindBuffer(absl::string_view src);

  // Writes data to the destination, to the physical destination position which
  // is `start_pos()`.
  //
  // Does not use buffer pointers. Increments `start_pos()` by the length
  // written, which must be `src.size()` on success. Returns `true` on success.
  //
  // Preconditions:
  //   `!src.empty()`
  //   `ok()`
  virtual bool WriteInternal(absl::string_view src) = 0;

  // Implementation of `FlushImpl()`, called with the last piece of data.
  //
  // By default writes data to the destination. Can be overridden if writing
  // coupled with flushing can be implemented better.
  //
  // Precondition: `start_to_limit() == 0`
  virtual bool FlushBehindBuffer(absl::string_view src, FlushType flush_type);

  // Implementation of `SeekSlow()`, `SizeImpl()`, `TruncateImpl()`, and
  // `ReadModeImpl()`, called while no data are buffered.
  //
  // By default they are implemented analogously to the corresponding `Writer`
  // functions.
  //
  // Preconditions:
  //   like the corresponding `Writer` functions
  //   `start_to_limit() == 0`
  virtual bool SeekBehindBuffer(Position new_pos);
  virtual std::optional<Position> SizeBehindBuffer();
  virtual bool TruncateBehindBuffer(Position new_size);
  virtual Reader* ReadModeBehindBuffer(Position initial_pos);

  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using Writer::WriteSlow;
  bool WriteSlow(absl::string_view src) override;
  bool WriteSlow(ByteFill src) override;
  bool FlushImpl(FlushType flush_type) override;
  bool SeekSlow(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  bool TruncateImpl(Position new_size) override;
  Reader* ReadModeImpl(Position initial_pos) override;

 private:
  // Writes `buffer_` to the destination. Sets buffer pointers to `nullptr`.
  bool SyncBuffer();

  WriteBufferSizer buffer_sizer_;
  // Contains buffered data, to be written directly after the physical
  // destination position which is `start_pos()`.
  Buffer buffer_;
  // Size of buffered data is `UnsignedMax(start_to_cursor(), written_)`.
  size_t written_ = 0;
};

// Implementation details follow.

inline BufferedWriter::BufferedWriter(BufferOptions buffer_options) noexcept
    : buffer_sizer_(buffer_options) {}

inline BufferedWriter::BufferedWriter(BufferedWriter&& that) noexcept
    : Writer(static_cast<Writer&&>(that)),
      buffer_sizer_(that.buffer_sizer_),
      buffer_(std::move(that.buffer_)),
      written_(that.written_) {}

inline BufferedWriter& BufferedWriter::operator=(
    BufferedWriter&& that) noexcept {
  Writer::operator=(static_cast<Writer&&>(that));
  buffer_sizer_ = that.buffer_sizer_;
  buffer_ = std::move(that.buffer_);
  written_ = that.written_;
  return *this;
}

inline void BufferedWriter::Reset(Closed) {
  Writer::Reset(kClosed);
  buffer_sizer_.Reset();
  buffer_ = Buffer();
  written_ = 0;
}

inline void BufferedWriter::Reset(BufferOptions buffer_options) {
  Writer::Reset();
  buffer_sizer_.Reset(buffer_options);
  written_ = 0;
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_BUFFERED_WRITER_H_


================================================
FILE: riegeli/bytes/cfile_handle.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Make `O_CLOEXEC` available on Darwin.
#if !defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < 700
#undef _XOPEN_SOURCE
#define _XOPEN_SOURCE 700
#endif

#include "riegeli/bytes/cfile_handle.h"

#ifdef __APPLE__
#include <fcntl.h>
#endif
#include <stdio.h>

#include <cerrno>
#ifdef _WIN32
#include <string>
#endif
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/c_string_ref.h"
#include "riegeli/base/status.h"
#include "riegeli/base/type_erased_ref.h"
#ifdef _WIN32
#include "riegeli/base/unicode.h"
#endif
#include "riegeli/bytes/path_ref.h"

namespace riegeli {

namespace cfile_internal {

template class CFileBase<UnownedCFileDeleter>;
template class CFileBase<OwnedCFileDeleter>;

}  // namespace cfile_internal

FILE* CFileHandle::GetMethodDefault(
    ABSL_ATTRIBUTE_UNUSED TypeErasedRef target) {
  return nullptr;
}

bool CFileHandle::IsOwningMethodDefault(
    ABSL_ATTRIBUTE_UNUSED TypeErasedRef target) {
  return false;
}

absl::string_view CFileHandle::FilenameMethodDefault(
    ABSL_ATTRIBUTE_UNUSED TypeErasedRef target) {
  return kDefaultFilename;
}

absl::Status CFileHandle::CloseMethodDefault(
    ABSL_ATTRIBUTE_UNUSED TypeErasedRef target) {
  return absl::OkStatus();
}

absl::Status OwnedCFile::Open(PathInitializer filename, CStringRef mode) {
  Reset(nullptr, std::move(filename));
#ifndef _WIN32
#ifndef __APPLE__
  FILE* const file = fopen(c_filename(), mode.c_str());
  if (ABSL_PREDICT_FALSE(file == nullptr)) {
    const int error_number = errno;
    return Annotate(absl::ErrnoToStatus(error_number, "fopen() failed"),
                    absl::StrCat("opening ", this->filename()));
  }
#else   // __APPLE__
  // Emulate `fopen()` with `open()` + `fdopen()`, adding support for 'e'
  // (`O_CLOEXEC`).
  mode_t open_mode;
  const char* mode_ptr = mode.c_str();
  switch (mode_ptr[0]) {
    case 'r':
      open_mode = O_RDONLY;
      break;
    case 'w':
      open_mode = O_WRONLY | O_CREAT | O_TRUNC;
      break;
    case 'a':
      open_mode = O_WRONLY | O_CREAT | O_APPEND;
      break;
    default:
      return absl::InvalidArgumentError(absl::StrCat(
          "Mode must begin with 'r', 'w', or 'a': ", mode.c_str()));
  }
  for (++mode_ptr; *mode_ptr != '\0' && *mode_ptr != ','; ++mode_ptr) {
    switch (*mode_ptr) {
      case '+':
        open_mode = (open_mode & ~O_ACCMODE) | O_RDWR;
        break;
      case 'b':
        break;
      case 'x':
        open_mode |= O_EXCL;
        break;
      case 'e':
        open_mode |= O_CLOEXEC;
        break;
      default:
        break;
    }
  }
again:
  const int fd = open(c_filename(), open_mode, 0666);
  if (ABSL_PREDICT_FALSE(fd < 0)) {
    const int error_number = errno;
    if (error_number == EINTR) goto again;
    return Annotate(absl::ErrnoToStatus(error_number, "open() failed"),
                    absl::StrCat("opening ", this->filename()));
  }
  FILE* const file = fdopen(fd, mode.c_str());
  if (ABSL_PREDICT_FALSE(file == nullptr)) {
    const int error_number = errno;
    close(fd);
    return Annotate(absl::ErrnoToStatus(error_number, "fdopen() failed"),
                    absl::StrCat("opening ", this->filename()));
  }
#endif  // __APPLE__
#else   // _WIN32
  std::wstring filename_wide;
  if (ABSL_PREDICT_FALSE(!Utf8ToWide(this->filename(), filename_wide))) {
    return absl::InvalidArgumentError(
        absl::StrCat("Filename not valid UTF-8: ", this->filename()));
  }
  std::wstring mode_wide;
  if (ABSL_PREDICT_FALSE(!Utf8ToWide(mode.c_str(), mode_wide))) {
    return absl::InvalidArgumentError(
        absl::StrCat("Mode not valid UTF-8: ", mode.c_str()));
  }
  FILE* const file = _wfopen(filename_wide.c_str(), mode_wide.c_str());
  if (ABSL_PREDICT_FALSE(file == nullptr)) {
    const int error_number = errno;
    return Annotate(absl::ErrnoToStatus(error_number, "_wfopen() failed"),
                    absl::StrCat("opening ", this->filename()));
  }
#endif  // _WIN32
  SetFileKeepFilename(file);
  return absl::OkStatus();
}

absl::Status OwnedCFile::Close() {
  FILE* const file = Release();
  if (file == nullptr) return absl::OkStatus();
  if (ABSL_PREDICT_FALSE(fclose(file) != 0)) {
    const int error_number = errno;
    return Annotate(absl::ErrnoToStatus(error_number, "fclose() failed"),
                    absl::StrCat("closing ", filename()));
  }
  return absl::OkStatus();
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/cfile_handle.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_CFILE_HANDLE_H_
#define RIEGELI_BYTES_CFILE_HANDLE_H_

#include <stdio.h>

#include <cstddef>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/any.h"
#include "riegeli/base/c_string_ref.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/shared_ptr.h"
#include "riegeli/base/type_erased_ref.h"
#include "riegeli/base/type_traits.h"
#include "riegeli/bytes/cfile_internal.h"
#include "riegeli/bytes/path_ref.h"

namespace riegeli {

// `SupportsCFileHandle<T>::value` is `true` if `T&` is a valid constructor
// argument for `CFileHandle`.

template <typename T, typename Enable = void>
struct SupportsCFileHandle : std::false_type {};

template <typename T>
struct SupportsCFileHandle<
    T,
    std::enable_if_t<std::conjunction_v<
        std::negation<std::is_const<T>>,
        std::is_convertible<decltype(std::declval<const T&>().get()), FILE*>>>>
    : std::true_type {};

// `CFileSupportsOpen<T>::value` is `true` if `T` supports `Open()` with the
// signature like in `OwnedCFile`, but taking `const char* mode` instead of
// `CStringRef mode` is sufficient.

template <typename T, typename Enable = void>
struct CFileSupportsOpen : std::false_type {};

template <typename T>
struct CFileSupportsOpen<
    T, std::enable_if_t<std::is_convertible_v<
           decltype(std::declval<T&>().Open(std::declval<PathInitializer>(),
                                            std::declval<const char*>())),
           absl::Status>>> : std::true_type {};

// Type-erased pointer to a target object like `UnownedCFile` or `OwnedCFile`
// which stores and possibly owns a `FILE*`.
//
// The target should support:
//
// ```
//   // Returns the `FILE*`.
//   FILE* get() const;
//
//   // Returns `true` if the target owns the `FILE*`, i.e. is responsible for
//   // closing it and the `FILE*` is present.
//   //
//   // Optional. If absent, the presence of `Close()` determines whether the
//   // target is considered to own the `FILE*`.
//   bool IsOwning() const;
//
//   // Opens a new `FILE*`, like with `fopen()` but taking
//   // `PathInitializer filename` instead of `const char* filename` and
//   // returning `absl::Status` instead of `FILE*`.
//   //
//   // Optional. Not used by `CFileHandle` itself. Used by `CFileReader` and
//   // `CFileWriter` constructors from the filename.
//   absl::Status Open(PathInitializer filename, const char* mode);
//
//   // Returns the filename of the `FILE*`, or "<none>" for
//   // default-constructed or moved-from target. Unchanged by `Close()`.
//   //
//   // If `Open()` was used, this is the filename passed to `Open()`, otherwise
//   // a filename is inferred from the `FILE*`. This can be a placeholder
//   // instead of a real filename if the `FILE*` does not refer to a named file
//   // or inferring the filename is not supported.
//   //
//   // Optional. If absent, "<unsupported>" is assumed.
//   absl::string_view filename() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
//
//   // If `IsOwning()`, closes the `FILE*`.
//   //
//   // If `!IsOwning()`, does nothing and returns `absl::OkStatus()`.
//   //
//   // Optional. If absent, `absl::OkStatus()` is assumed.
//   absl::Status Close();
// ```
class CFileHandle : public WithEqual<CFileHandle> {
 public:
  // Creates a `CFileHandle` which does not refer to a target.
  CFileHandle() = default;
  /*implicit*/ CFileHandle(std::nullptr_t) {}

  // Creates a `CFileHandle` which refers to `target`.
  template <typename T,
            std::enable_if_t<std::conjunction_v<NotSameRef<CFileHandle, T&>,
                                                SupportsCFileHandle<T>>,
                             int> = 0>
  /*implicit*/ CFileHandle(T& target ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : methods_(&kMethods<T>), target_(target) {}

  CFileHandle(const CFileHandle& that) = default;
  CFileHandle& operator=(const CFileHandle& that) = default;

  // Returns `true` if the `FILE*` is present.
  bool is_open() const { return *this != nullptr; }

  // Returns the `FILE*`.
  FILE* get() const { return methods_->get(target_); }

  // Returns `true` if the `CFileHandle` owns the `FILE*`, i.e. is responsible
  // for closing it and the `FILE*` is present.
  bool IsOwning() const { return methods_->is_owning(target_); }

  // Returns the filename of the `FILE*`, or "<none>" for default-constructed or
  // moved-from target. Unchanged by `Close()`.
  //
  // If `Open()` was used, this is the filename passed to `Open()`, otherwise a
  // filename is inferred from the `FILE*`. This can be a placeholder instead of
  // a real filename if the `FILE*` does not refer to a named file or inferring
  // the filename is not supported.
  //
  // If the target does not support `filename()`, returns "<unsupported>".
  absl::string_view filename() const { return methods_->filename(target_); }

  // If `IsOwning()`, closes the `FILE*`.
  //
  // If `!IsOwning()`, does nothing and returns `absl::OkStatus()`.
  absl::Status Close() { return methods_->close(target_); }

  friend bool operator==(CFileHandle a, CFileHandle b) {
    return a.get() == b.get();
  }
  friend bool operator==(CFileHandle a, FILE* b) { return a.get() == b; }
  friend bool operator==(CFileHandle a, std::nullptr_t) {
    return a.target_.empty() || a.get() == nullptr;
  }

 private:
  struct Methods {
    FILE* (*get)(TypeErasedRef target);
    bool (*is_owning)(TypeErasedRef target);
    absl::string_view (*filename)(TypeErasedRef target);
    absl::Status (*close)(TypeErasedRef target);
  };

  template <typename T, typename Enable = void>
  struct HasIsOwning : std::false_type {};
  template <typename T>
  struct HasIsOwning<T,
                     std::enable_if_t<std::is_convertible_v<
                         decltype(std::declval<const T&>().IsOwning()), bool>>>
      : std::true_type {};

  template <typename T, typename Enable = void>
  struct HasFilename : std::false_type {};
  template <typename T>
  struct HasFilename<
      T, std::enable_if_t<std::is_convertible_v<
             decltype(std::declval<const T&>().filename()), absl::string_view>>>
      : std::true_type {};

  template <typename T, typename Enable = void>
  struct HasClose : std::false_type {};
  template <typename T>
  struct HasClose<T, std::enable_if_t<std::is_convertible_v<
                         decltype(std::declval<T&>().Close()), absl::Status>>>
      : std::true_type {};

  static FILE* GetMethodDefault(TypeErasedRef target);
  static bool IsOwningMethodDefault(TypeErasedRef target);
  static absl::string_view FilenameMethodDefault(TypeErasedRef target);
  static absl::Status CloseMethodDefault(TypeErasedRef target);

  static constexpr Methods kMethodsDefault = {
      GetMethodDefault, IsOwningMethodDefault, FilenameMethodDefault,
      CloseMethodDefault};

  template <typename T>
  static FILE* GetMethod(TypeErasedRef target) {
    return target.Cast<const T&>().get();
  }

  template <typename T>
  static bool IsOwningMethod(TypeErasedRef target) {
    if constexpr (HasIsOwning<T>::value) {
      return target.Cast<const T&>().IsOwning();
    } else if constexpr (HasClose<T>::value) {
      return target.Cast<const T&>().get() != nullptr;
    } else {
      return false;
    }
  }

  template <typename T>
  static absl::string_view FilenameMethod(TypeErasedRef target) {
    if constexpr (HasFilename<T>::value) {
      return target.Cast<const T&>().filename();
    } else {
      return "<unsupported>";
    }
  }

  template <typename T>
  static absl::Status CloseMethod(TypeErasedRef target) {
    if constexpr (HasClose<T>::value) {
      return target.Cast<T&>().Close();
    } else {
      return absl::OkStatus();
    }
  }

  template <typename T>
  static constexpr Methods kMethods = {GetMethod<T>, IsOwningMethod<T>,
                                       FilenameMethod<T>, CloseMethod<T>};

  const Methods* methods_ = &kMethodsDefault;
  TypeErasedRef target_;
};

namespace cfile_internal {

class UnownedCFileDeleter;

// Common parts of `UnownedCFileDeleter` and `OwnedCFileDeleter`.
class ABSL_ATTRIBUTE_TRIVIAL_ABI CFileDeleterBase {
 public:
  CFileDeleterBase() = default;

  explicit CFileDeleterBase(PathInitializer filename)
      : metadata_(riegeli::Maker<Metadata>(std::move(filename))) {}

  // Supports creating a `CFileBase` converted from `UnownedCFile`.
  explicit CFileDeleterBase(const UnownedCFileDeleter& that);
  explicit CFileDeleterBase(UnownedCFileDeleter&& that);

  void Reset() { metadata_ = nullptr; }

  void Reset(PathInitializer filename) {
    if (!metadata_.IsUnique()) {
      metadata_.Reset(riegeli::Maker<Metadata>(std::move(filename)));
    } else {
      riegeli::Reset(metadata_->filename, std::move(filename));
    }
  }

  // Supports creating a `CFileBase` converted from `UnownedCFile`.
  void Reset(const UnownedCFileDeleter& that);

  // Supports creating a `CFileBase` converted from `UnownedCFile`, and
  // resetting `CFileBase` from the same `CFileBase`.
  void Reset(CFileDeleterBase&& that);

  absl::string_view filename() const {
    if (ABSL_PREDICT_FALSE(metadata_ == nullptr)) return kDefaultFilename;
    return metadata_->filename;
  }

  const char* c_filename() const {
    if (ABSL_PREDICT_FALSE(metadata_ == nullptr)) return kDefaultFilenameCStr;
    return metadata_->filename.c_str();
  }

 protected:
  CFileDeleterBase(const CFileDeleterBase& that) = default;
  CFileDeleterBase& operator=(const CFileDeleterBase& that) = default;

  CFileDeleterBase(CFileDeleterBase&& that) = default;
  CFileDeleterBase& operator=(CFileDeleterBase&& that) = default;

 private:
  struct Metadata {
    explicit Metadata(PathInitializer filename)
        : filename(std::move(filename)) {}

    std::string filename;
  };

  // `nullptr` means `filename = kDefaultFilename`.
  SharedPtr<Metadata> metadata_;
};

class UnownedCFileDeleter : public CFileDeleterBase {
 public:
  using CFileDeleterBase::CFileDeleterBase;

  // Supports creating an `UnownedCFile` converted from any `CFileBase`.
  explicit UnownedCFileDeleter(const CFileDeleterBase& that)
      : CFileDeleterBase(that) {}

  UnownedCFileDeleter(const UnownedCFileDeleter& that) = default;
  UnownedCFileDeleter& operator=(const UnownedCFileDeleter& that) = default;

  UnownedCFileDeleter(UnownedCFileDeleter&& that) = default;
  UnownedCFileDeleter& operator=(UnownedCFileDeleter&& that) = default;

  using CFileDeleterBase::Reset;
  // Supports creating an `UnownedCFile` converted from any `CFileBase`.
  void Reset(const CFileDeleterBase& that) {
    CFileDeleterBase::operator=(that);
  }

  static void Destroy(ABSL_ATTRIBUTE_UNUSED FILE* file) {}
};

class OwnedCFileDeleter : public CFileDeleterBase {
 public:
  using CFileDeleterBase::CFileDeleterBase;

  OwnedCFileDeleter(OwnedCFileDeleter&& that) = default;
  OwnedCFileDeleter& operator=(OwnedCFileDeleter&& that) = default;

  static void Destroy(FILE* file) { fclose(file); }
};

inline CFileDeleterBase::CFileDeleterBase(const UnownedCFileDeleter& that)
    : metadata_(that.metadata_) {}

inline CFileDeleterBase::CFileDeleterBase(UnownedCFileDeleter&& that)
    : metadata_(std::move(that.metadata_)) {}

inline void CFileDeleterBase::Reset(const UnownedCFileDeleter& that) {
  metadata_ = that.metadata_;
}

inline void CFileDeleterBase::Reset(CFileDeleterBase&& that) {
  metadata_ = std::move(that.metadata_);
}

// Common parts of `UnownedCFile` and `OwnedCFile`.
template <typename Deleter>
class ABSL_ATTRIBUTE_TRIVIAL_ABI CFileBase {
 public:
  // Creates a `CFileBase` which does not store a `FILE*` and stores "<none>"
  // as the filename.
  CFileBase() = default;
  /*implicit*/ CFileBase(std::nullptr_t) {}

  // Creates a `CFileBase` which stores `file` with the filename inferred from
  // the `FILE*` (or "<none>" if `file == nullptr`).
  explicit CFileBase(FILE* file ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : file_(file),
        deleter_(file_ == nullptr ? Deleter()
                                  : Deleter(FilenameForCFile(file_))) {}

  // Creates a `CFileBase` which stores `file` with `filename`.
  explicit CFileBase(FILE* file ABSL_ATTRIBUTE_LIFETIME_BOUND,
                     PathInitializer filename)
      : file_(file), deleter_(std::move(filename)) {}

  // Creates a `CFileBase` converted from `UnownedCFile`.
  template <
      typename DependentDeleter = Deleter,
      std::enable_if_t<!std::is_same_v<DependentDeleter, UnownedCFileDeleter>,
                       int> = 0>
  explicit CFileBase(const CFileBase<UnownedCFileDeleter>& that)
      : file_(that.file_), deleter_(that.deleter_) {}
  template <
      typename DependentDeleter = Deleter,
      std::enable_if_t<!std::is_same_v<DependentDeleter, UnownedCFileDeleter>,
                       int> = 0>
  explicit CFileBase(CFileBase<UnownedCFileDeleter>&& that)
      : file_(that.Release()), deleter_(std::move(that.deleter_)) {}

  // Creates an `UnownedCFile` converted from any `CFileBase`.
  template <
      typename OtherDeleter,
      std::enable_if_t<
          std::conjunction_v<
              std::is_same<Deleter, UnownedCFileDeleter>,
              std::negation<std::is_same<OtherDeleter, UnownedCFileDeleter>>>,
          int> = 0>
  /*implicit*/ CFileBase(
      const CFileBase<OtherDeleter>& that ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : file_(that.file_), deleter_(that.deleter_) {}

  // Makes `*this` equivalent to a newly constructed `CFileBase`.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(std::nullptr_t = nullptr) {
    SetFileKeepFilename();
    deleter_.Reset();
  }
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(FILE* file) {
    SetFileKeepFilename(file);
    if (file == nullptr) {
      deleter_.Reset();
    } else {
      deleter_.Reset(FilenameForCFile(file));
    }
  }
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(FILE* file,
                                          PathInitializer filename) {
    SetFileKeepFilename(file);
    deleter_.Reset(std::move(filename));
  }
  template <
      typename OtherDeleter,
      std::enable_if_t<
          std::disjunction_v<std::is_same<Deleter, UnownedCFileDeleter>,
                             std::is_same<OtherDeleter, UnownedCFileDeleter>>,
          int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(const CFileBase<OtherDeleter>& that) {
    SetFileKeepFilename(that.file_);
    deleter_.Reset(that.deleter_);
  }
  template <
      typename OtherDeleter,
      std::enable_if_t<
          std::disjunction_v<std::is_same<OtherDeleter, UnownedCFileDeleter>,
                             std::is_same<OtherDeleter, Deleter>>,
          int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(CFileBase<OtherDeleter>&& that) {
    SetFileKeepFilename(that.Release());
    deleter_.Reset(std::move(that.deleter_));
  }

  // Sets the `FILE*`, keeping `filename()` unchanged.
  void SetFileKeepFilename(FILE* file = nullptr) {
    Destroy();
    file_ = file;
  }

  // Returns `true` if the `FILE*` is present.
  bool is_open() const { return file_ != nullptr; }

  // Returns the `FILE*`.
  FILE* get() const { return file_; }

  // Returns the filename of the `FILE*`, or "<none>" for default-constructed or
  // moved-from `CFileBase`. Unchanged by `Close()` and `Release()`.
  //
  // If `Open()` was used, this is the filename passed to `Open()`, otherwise
  // a filename is inferred from the `FILE*`. This can be a placeholder instead
  // of a real filename if the `FILE*` does not refer to a named file or
  // inferring the filename is not supported.
  absl::string_view filename() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return deleter_.filename();
  }

  // Returns `filename()` as a NUL-terminated string.
  const char* c_filename() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return deleter_.c_filename();
  }

 protected:
  CFileBase(const CFileBase& that) = default;
  CFileBase& operator=(const CFileBase& that) = default;

  CFileBase(CFileBase&& that) noexcept
      : file_(that.Release()), deleter_(std::move(that.deleter_)) {}
  CFileBase& operator=(CFileBase&& that) noexcept {
    FILE* const file = that.Release();
    Destroy();
    file_ = file;
    deleter_ = std::move(that.deleter_);
    return *this;
  }

  ~CFileBase() { Destroy(); }

  // Returns the file. The stored `FILE*` is left absent, without modifying
  // `filename()`.
  FILE* Release() { return std::exchange(file_, nullptr); }

 private:
  template <typename OtherDeleter>
  friend class CFileBase;  // For conversions.

  void Destroy() {
    if (is_open()) deleter_.Destroy(file_);
  }

  FILE* file_ = nullptr;
  Deleter deleter_;
};

extern template class CFileBase<UnownedCFileDeleter>;
extern template class CFileBase<OwnedCFileDeleter>;

}  // namespace cfile_internal

// Stores a `FILE*` but does not own it, i.e. is not responsible for closing it.
//
// The `FILE*` can be `nullptr` which means absent.
class UnownedCFile
    : public cfile_internal::CFileBase<cfile_internal::UnownedCFileDeleter>,
      public WithEqual<UnownedCFile> {
 public:
  using CFileBase::CFileBase;

  // Overridden to make implicit.
  /*implicit*/ UnownedCFile(FILE* file ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : CFileBase(file) {}

  // Creates an `UnownedCFile` which stores `file.get()` with `file.filename()`.
  explicit UnownedCFile(CFileHandle file)
      : CFileBase(file.get(), file.filename()) {}

  UnownedCFile(const UnownedCFile& that) = default;
  UnownedCFile& operator=(const UnownedCFile& that) = default;

  // The moved-from `FILE*` is left absent.
  UnownedCFile(UnownedCFile&& that) = default;
  UnownedCFile& operator=(UnownedCFile&& that) = default;

  using CFileBase::Reset;
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(CFileHandle file) {
    Reset(file.get(), file.filename());
  }

  friend bool operator==(const UnownedCFile& a, const UnownedCFile& b) {
    return a.get() == b.get();
  }
  friend bool operator==(const UnownedCFile& a, FILE* b) {
    return a.get() == b;
  }
};

// Owns a `FILE*`, i.e. stores it and is responsible for closing it.
//
// The `FILE*` can be `nullptr` which means absent.
class OwnedCFile
    : public cfile_internal::CFileBase<cfile_internal::OwnedCFileDeleter>,
      public WithEqual<OwnedCFile> {
 public:
  using CFileBase::CFileBase;

  // The moved-from `FILE*` is left absent.
  OwnedCFile(OwnedCFile&& that) = default;
  OwnedCFile& operator=(OwnedCFile&& that) = default;

  // Overridden to apply `ABSL_ATTRIBUTE_LIFETIME_BOUND`.
  FILE* get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return CFileBase::get(); }

  using CFileBase::Release;

  // Opens a new `FILE*`, like with `fopen()`, but taking
  // `PathInitializer filename` instead of `const char* filename`,
  // `CStringRef mode` instead of `const char* mode`, and returning
  // `absl::Status` instead of `FILE*`.
  ABSL_ATTRIBUTE_REINITIALIZES absl::Status Open(PathInitializer filename,
                                                 CStringRef mode);

  // Closes the `FILE*` if present.
  //
  // Returns `absl::OkStatus()` if absent.
  absl::Status Close();

  friend bool operator==(const OwnedCFile& a, FILE* b) { return a.get() == b; }
};

// Type-erased object like `UnownedCFile` or `OwnedCFile` which stores and
// possibly owns a `FILE*`.
using AnyCFile = Any<CFileHandle>::Inlining<UnownedCFile, OwnedCFile>;

}  // namespace riegeli

#endif  // RIEGELI_BYTES_CFILE_HANDLE_H_


================================================
FILE: riegeli/bytes/cfile_internal.cc
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/cfile_internal.h"

#include <stdio.h>

#include <string>

#include "absl/base/optimization.h"
#include "riegeli/bytes/fd_internal.h"

namespace riegeli::cfile_internal {

std::string FilenameForCFile(FILE* file) {
  const int fd = fileno(file);
  if (ABSL_PREDICT_FALSE(fd < 0)) {
    return "<unknown>";
  } else {
    return fd_internal::FilenameForFd(fd);
  }
}

}  // namespace riegeli::cfile_internal


================================================
FILE: riegeli/bytes/cfile_internal.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_CFILE_INTERNAL_H_
#define RIEGELI_BYTES_CFILE_INTERNAL_H_

#include <stdio.h>

#include <string>

namespace riegeli::cfile_internal {

// Infers a filename from the fd corresponding to the `FILE` by reading the
// symlink target for `absl::StrCat("/proc/self/fd/", fd)` (on Windows returns
// a `absl::StrCat("<fd ", fd, ">")` placeholder instead), or returning
// "<unknown>" if there is no corresponding fd.
std::string FilenameForCFile(FILE* file);

}  // namespace riegeli::cfile_internal

#endif  // RIEGELI_BYTES_CFILE_INTERNAL_H_


================================================
FILE: riegeli/bytes/cfile_internal_for_cc.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_CFILE_INTERNAL_FOR_CC_H_
#define RIEGELI_BYTES_CFILE_INTERNAL_FOR_CC_H_

// Warning: Do not include this header in other headers, because the definition
// of `off_t` depends on `_FILE_OFFSET_BITS` which can reliably be set only
// in a standalone compilation unit.

#include <stdio.h>
#ifndef _WIN32
#include <sys/types.h>
#endif

#include <type_traits>
#include <utility>

#include "absl/strings/string_view.h"

namespace riegeli::cfile_internal {

#ifndef _WIN32

// Use `fseeko()` and `ftello()` when available, otherwise `fseek()` and
// `ftell()`.

template <typename DependentFILE, typename Enable = void>
struct HaveFSeekO : std::false_type {};

template <typename DependentFILE>
struct HaveFSeekO<
    DependentFILE,
    std::void_t<decltype(fseeko(std::declval<DependentFILE*>(),
                                std::declval<off_t>(), std::declval<int>())),
                decltype(ftello(std::declval<DependentFILE*>()))>>
    : std::true_type {};

using Offset = std::conditional_t<HaveFSeekO<FILE>::value, off_t, long>;

template <typename DependentFILE>
inline int FSeek(DependentFILE* file, Offset offset, int whence) {
  if constexpr (HaveFSeekO<DependentFILE>::value) {
    return fseeko(file, offset, whence);
  } else {
    return fseek(file, offset, whence);
  }
}

inline constexpr absl::string_view kFSeekFunctionName =
    HaveFSeekO<FILE>::value ? "fseeko()" : "fseek()";

template <typename DependentFILE>
inline Offset FTell(DependentFILE* file) {
  if constexpr (HaveFSeekO<DependentFILE>::value) {
    return ftello(file);
  } else {
    return ftell(file);
  }
}

inline constexpr absl::string_view kFTellFunctionName =
    HaveFSeekO<FILE>::value ? "ftello()" : "ftell()";

#else  // _WIN32

using Offset = __int64;

inline int FSeek(FILE* file, Offset offset, int whence) {
  return _fseeki64(file, offset, whence);
}

inline constexpr absl::string_view kFSeekFunctionName = "_fseeki64";

inline Offset FTell(FILE* file) { return _ftelli64(file); }

inline constexpr absl::string_view kFTellFunctionName = "_ftelli64";

#endif  // _WIN32

}  // namespace riegeli::cfile_internal

#endif  // RIEGELI_BYTES_CFILE_INTERNAL_FOR_CC_H_


================================================
FILE: riegeli/bytes/cfile_reader.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef _WIN32

// Make `fseeko()` and `ftello()` available.
#if !defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < 500
#undef _XOPEN_SOURCE
#define _XOPEN_SOURCE 500
#endif

// Make `off_t` 64-bit even on 32-bit systems.
#undef _FILE_OFFSET_BITS
#define _FILE_OFFSET_BITS 64

#endif

#include "riegeli/bytes/cfile_reader.h"

#ifdef _WIN32
#include <fcntl.h>
#include <io.h>
#endif
#include <stddef.h>
#include <stdio.h>

#include <cerrno>
#include <limits>
#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/dynamic_annotations.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/global.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_reader.h"
#include "riegeli/bytes/cfile_internal_for_cc.h"

namespace riegeli {

namespace {

// There is no portable way to apply efficient buffering over `FILE` while
// avoiding reading ahead more than eventually needed, because `fread()`
// performs multiple low level reads until the whole requested length is read.
//
// To solve this, `AvailableLength()` tries to determine how much data is
// available in the buffer of the `FILE` in a glibc-specific way, with a
// portable fallback.

template <typename DependentFILE, typename Enable = void>
struct HasAvailableLength : std::false_type {};

template <typename DependentFILE>
struct HasAvailableLength<
    DependentFILE,
    std::void_t<decltype(std::declval<DependentFILE>()._IO_read_end -
                         std::declval<DependentFILE>()._IO_read_ptr)>>
    : std::true_type {};

template <typename DependentFILE>
inline size_t AvailableLength(DependentFILE* src) {
  if constexpr (HasAvailableLength<DependentFILE>::value) {
    // Msan does not properly track initialization performed by precompiled
    // libraries.
    ABSL_ANNOTATE_MEMORY_IS_INITIALIZED(src, sizeof(DependentFILE));
    return PtrDistance(src->_IO_read_ptr, src->_IO_read_end);
  } else {
    return 0;
  }
}

}  // namespace

void CFileReaderBase::Initialize(FILE* src, Options&& options) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of CFileReader: null FILE pointer";
  InitializePos(src, std::move(options)
#ifdef _WIN32
                         ,
                /*mode_was_passed_to_fopen=*/false
#endif
  );
}

void CFileReaderBase::InitializePos(FILE* src, Options&& options
#ifdef _WIN32
                                    ,
                                    bool mode_was_passed_to_fopen
#endif
) {
  RIEGELI_ASSERT(!supports_random_access_)
      << "Failed precondition of CFileReaderBase::InitializePos(): "
         "supports_random_access_ not reset";
  RIEGELI_ASSERT_OK(random_access_status_)
      << "Failed precondition of CFileReaderBase::InitializePos(): "
         "random_access_status_ not reset";
#ifdef _WIN32
  RIEGELI_ASSERT_EQ(original_mode_, std::nullopt)
      << "Failed precondition of CFileReaderBase::InitializePos(): "
         "original_mode_ not reset";
#endif
  if (ABSL_PREDICT_FALSE(ferror(src))) {
    FailOperation("FILE");
    return;
  }
#ifdef _WIN32
  int text_mode = file_internal::GetTextAsFlags(options.mode());
  if (!mode_was_passed_to_fopen && text_mode != 0) {
    const int fd = _fileno(src);
    if (ABSL_PREDICT_FALSE(fd < 0)) {
      FailOperation("_fileno()");
      return;
    }
    const int original_mode = _setmode(fd, text_mode);
    if (ABSL_PREDICT_FALSE(original_mode < 0)) {
      FailOperation("_setmode()");
      return;
    }
    original_mode_ = original_mode;
  }
  if (options.assumed_pos() == std::nullopt) {
    if (text_mode == 0) {
      const int fd = _fileno(src);
      if (ABSL_PREDICT_FALSE(fd < 0)) {
        FailOperation("_fileno()");
        return;
      }
      // There is no `_getmode()`, but `_setmode()` returns the previous mode.
      text_mode = _setmode(fd, _O_BINARY);
      if (ABSL_PREDICT_FALSE(text_mode < 0)) {
        FailOperation("_setmode()");
        return;
      }
      if (ABSL_PREDICT_FALSE(_setmode(fd, text_mode) < 0)) {
        FailOperation("_setmode()");
        return;
      }
    }
    if (text_mode != _O_BINARY) options.set_assumed_pos(0);
  }
#endif  // _WIN32
  if (options.assumed_pos() != std::nullopt) {
    if (ABSL_PREDICT_FALSE(
            *options.assumed_pos() >
            Position{std::numeric_limits<cfile_internal::Offset>::max()})) {
      FailOverflow();
      return;
    }
    set_limit_pos(*options.assumed_pos());
    // `supports_random_access_` is left as `false`.
    random_access_status_ = Global([] {
      return absl::UnimplementedError(
          "CFileReaderBase::Options::assumed_pos() excludes random access");
    });
  } else {
    const cfile_internal::Offset file_pos = cfile_internal::FTell(src);
    if (file_pos < 0) {
      // Random access is not supported. Assume 0 as the initial position.
      // `supports_random_access_` is left as `false`.
      random_access_status_ =
          FailedOperationStatus(cfile_internal::kFTellFunctionName);
      clearerr(src);
      return;
    }
    set_limit_pos(IntCast<Position>(file_pos));

    // Check the size, and whether random access is supported.
    if (cfile_internal::FSeek(src, 0, SEEK_END) != 0) {
      // Random access is not supported. `supports_random_access_` is left as
      // `false`.
      random_access_status_ =
          FailedOperationStatus(cfile_internal::kFSeekFunctionName);
      clearerr(src);
      return;
    }
    cfile_internal::Offset file_size = cfile_internal::FTell(src);
    if (ABSL_PREDICT_FALSE(file_size < 0)) {
      FailOperation(cfile_internal::kFTellFunctionName);
      return;
    }
    if (limit_pos() != IntCast<Position>(file_size)) {
      if (ABSL_PREDICT_FALSE(cfile_internal::FSeek(
                                 src,
                                 IntCast<cfile_internal::Offset>(limit_pos()),
                                 SEEK_SET) != 0)) {
        FailOperation(cfile_internal::kFSeekFunctionName);
        return;
      }
    }
#ifndef _WIN32
    if (file_size == 0 && limit_pos() == 0) {
      // Some "/proc" and "/sys" files claim to have zero size but have
      // non-empty contents when read.
      if (BufferedReader::PullSlow(1, 0)) {
        if (growing_source_) {
          // Check the size again. Maybe the file has grown.
          if (ABSL_PREDICT_FALSE(cfile_internal::FSeek(src, 0, SEEK_END) !=
                                 0)) {
            FailOperation(cfile_internal::kFSeekFunctionName);
            return;
          }
          file_size = cfile_internal::FTell(src);
          if (ABSL_PREDICT_FALSE(file_size < 0)) {
            FailOperation(cfile_internal::kFTellFunctionName);
            return;
          }
          if (limit_pos() != IntCast<Position>(file_size)) {
            if (ABSL_PREDICT_FALSE(
                    cfile_internal::FSeek(
                        src, IntCast<cfile_internal::Offset>(limit_pos()),
                        SEEK_SET) != 0)) {
              FailOperation(cfile_internal::kFSeekFunctionName);
              return;
            }
          }
          if (file_size > 0) goto regular;
        }
        // This is one of "/proc" or "/sys" files which claim to have zero size
        // but have non-empty contents when read. Random access is not
        // supported. `supports_random_access_` is left as `false`.
        random_access_status_ = Global([] {
          return absl::UnimplementedError(
              "Random access is not supported because "
              "the file claims zero size but has non-empty contents when read");
        });
        return;
      }
      if (ABSL_PREDICT_FALSE(!ok())) return;
      // This is a regular empty file.
    }
  regular:
#endif
    // Random access is supported.
    supports_random_access_ = true;
    if (!growing_source_) set_exact_size(IntCast<Position>(file_size));
  }
  BeginRun();
}

void CFileReaderBase::Done() {
  BufferedReader::Done();
#ifdef _WIN32
  if (original_mode_ != std::nullopt) {
    FILE* const src = SrcFile();
    const int fd = _fileno(src);
    if (ABSL_PREDICT_FALSE(fd < 0)) {
      FailOperation("_fileno()");
    } else if (ABSL_PREDICT_FALSE(_setmode(fd, *original_mode_) < 0)) {
      FailOperation("_setmode()");
    }
  }
#endif  // !_WIN32
  random_access_status_ = absl::OkStatus();
}

inline absl::Status CFileReaderBase::FailedOperationStatus(
    absl::string_view operation) {
  const int error_number = errno;
  RIEGELI_ASSERT_NE(error_number, 0)
      << "Failed precondition of CFileReaderBase::FailedOperationStatus(): "
         "zero errno";
  return absl::ErrnoToStatus(error_number, absl::StrCat(operation, " failed"));
}

bool CFileReaderBase::FailOperation(absl::string_view operation) {
  return Fail(FailedOperationStatus(operation));
}

absl::Status CFileReaderBase::AnnotateStatusImpl(absl::Status status) {
  return BufferedReader::AnnotateStatusImpl(
      Annotate(status, absl::StrCat("reading ", filename())));
}

bool CFileReaderBase::ReadInternal(size_t min_length, size_t max_length,
                                   char* dest) {
  RIEGELI_ASSERT_GT(min_length, 0u)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "nothing to read";
  RIEGELI_ASSERT_GE(max_length, min_length)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "max_length < min_length";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedReader::ReadInternal()";
  FILE* const src = SrcFile();
  for (;;) {
    if (ABSL_PREDICT_FALSE(
            limit_pos() >=
            Position{std::numeric_limits<cfile_internal::Offset>::max()})) {
      return FailOverflow();
    }
    const size_t length_to_read = UnsignedMin(
        UnsignedClamp(AvailableLength(src), min_length, max_length),
        Position{std::numeric_limits<cfile_internal::Offset>::max()} -
            limit_pos());
    const size_t length_read = fread(dest, 1, length_to_read, src);
    RIEGELI_ASSERT_LE(length_read, length_to_read)
        << "fread() read more than requested";
    move_limit_pos(length_read);
    if (ABSL_PREDICT_FALSE(length_read < length_to_read)) {
      RIEGELI_ASSERT_LT(length_read, min_length)
          << "fread() read less than was available";
      if (ABSL_PREDICT_FALSE(ferror(src))) return FailOperation("fread()");
      RIEGELI_ASSERT(feof(src))
          << "fread() succeeded but read less than requested";
      clearerr(src);
      if (!growing_source_) set_exact_size(limit_pos());
      return false;
    }
    if (length_read >= min_length) return true;
    dest += length_read;
    min_length -= length_read;
    max_length -= length_read;
  }
}

bool CFileReaderBase::SeekBehindBuffer(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!CFileReaderBase::SupportsRandomAccess())) {
    if (ABSL_PREDICT_FALSE(new_pos < start_pos())) {
      if (ok()) Fail(random_access_status_);
      return false;
    }
    return BufferedReader::SeekBehindBuffer(new_pos);
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  FILE* const src = SrcFile();
  if (new_pos > limit_pos()) {
    // Seeking forwards.
    if (exact_size() != std::nullopt) {
      if (ABSL_PREDICT_FALSE(new_pos > *exact_size())) {
        // File ends.
        if (ABSL_PREDICT_FALSE(cfile_internal::FSeek(
                src, IntCast<cfile_internal::Offset>(*exact_size()),
                SEEK_SET)) != 0) {
          return FailOperation(cfile_internal::kFSeekFunctionName);
        }
        set_limit_pos(*exact_size());
        return false;
      }
    } else {
      if (ABSL_PREDICT_FALSE(cfile_internal::FSeek(src, 0, SEEK_END) != 0)) {
        return FailOperation(cfile_internal::kFSeekFunctionName);
      }
      const cfile_internal::Offset file_size = cfile_internal::FTell(src);
      if (ABSL_PREDICT_FALSE(file_size < 0)) {
        return FailOperation(cfile_internal::kFTellFunctionName);
      }
      if (!growing_source_) set_exact_size(IntCast<Position>(file_size));
      if (ABSL_PREDICT_FALSE(new_pos > IntCast<Position>(file_size))) {
        // File ends.
        set_limit_pos(IntCast<Position>(file_size));
        return false;
      }
    }
  }
  if (ABSL_PREDICT_FALSE(cfile_internal::FSeek(
          src, IntCast<cfile_internal::Offset>(new_pos), SEEK_SET)) != 0) {
    return FailOperation(cfile_internal::kFSeekFunctionName);
  }
  set_limit_pos(new_pos);
  return true;
}

std::optional<Position> CFileReaderBase::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  if (exact_size() != std::nullopt) return *exact_size();
  if (ABSL_PREDICT_FALSE(!CFileReaderBase::SupportsRandomAccess())) {
    Fail(random_access_status_);
    return std::nullopt;
  }
  FILE* const src = SrcFile();
  if (ABSL_PREDICT_FALSE(cfile_internal::FSeek(src, 0, SEEK_END)) != 0) {
    FailOperation(cfile_internal::kFSeekFunctionName);
    return std::nullopt;
  }
  const cfile_internal::Offset file_size = cfile_internal::FTell(src);
  if (ABSL_PREDICT_FALSE(file_size < 0)) {
    FailOperation(cfile_internal::kFTellFunctionName);
    return std::nullopt;
  }
  if (ABSL_PREDICT_FALSE(cfile_internal::FSeek(
                             src, IntCast<cfile_internal::Offset>(limit_pos()),
                             SEEK_SET) != 0)) {
    FailOperation(cfile_internal::kFSeekFunctionName);
    return std::nullopt;
  }
  if (!growing_source_) set_exact_size(IntCast<Position>(file_size));
  return IntCast<Position>(file_size);
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/cfile_reader.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_CFILE_READER_H_
#define RIEGELI_BYTES_CFILE_READER_H_

#include <stddef.h>
#include <stdio.h>

#include <optional>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/string_ref.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_reader.h"
#include "riegeli/bytes/cfile_handle.h"
#include "riegeli/bytes/file_mode_string.h"
#include "riegeli/bytes/path_ref.h"

namespace riegeli {

// Template parameter independent part of `CFileReader`.
class CFileReaderBase : public BufferedReader {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // If `CFileReader` opens a `FILE` with a filename, `mode()` is the second
    // argument of `fopen()` and specifies the open mode, typically "r" (on
    // Windows: "rb").
    //
    // `mode()` can also be changed with `set_inheritable()` and `set_text()`.
    //
    // Default: "re" (on Windows: "rbN").
    Options& set_mode(StringInitializer mode) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      riegeli::Reset(mode_, std::move(mode));
      return *this;
    }
    Options&& set_mode(StringInitializer mode) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_mode(std::move(mode)));
    }
    const std::string& mode() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return mode_;
    }

    // If `false`, `execve()` (`CreateProcess()` on Windows) will close the
    // file. This is not supported by all systems, but it is supported at least
    // on Linux, Windows, FreeBSD, OpenBSD, NetBSD, and it is planned for POSIX
    // (https://www.austingroupbugs.net/view.php?id=411). For MacOS X this is
    // emulated by `CFileReader`.
    //
    // If `true`, the file will remain open across `execve()` (`CreateProcess()`
    // on Windows).
    //
    // If `CFileReader` reads from an already open file, `inheritable()` has no
    // effect.
    //
    // `set_inheritable()` affects `mode()`.
    //
    // Default: `false`.
    Options& set_inheritable(bool inheritable) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      file_internal::SetInheritableReading(inheritable, mode_);
      return *this;
    }
    Options&& set_inheritable(bool inheritable) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_inheritable(inheritable));
    }
    bool inheritable() const { return file_internal::GetInheritable(mode_); }

    // If `false`, data will be read directly from the file. This is called the
    // binary mode.
    //
    // If `true`, text mode translation will be applied on Windows:
    // CR-LF character pairs are translated to LF, and a ^Z character is
    // interpreted as end of file.
    //
    // It is recommended to use `ReadLine()` or `TextReader` instead, which
    // expect a binary mode `Reader`.
    //
    // `set_text()` has an effect only on Windows. It is applicable whenever
    // `CFileReader` opens a `FILE` with a filename or reads from an already
    // open `FILE`.
    //
    // `set_text()` affects `mode()`.
    //
    // Default: `false`.
    Options& set_text(bool text) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      file_internal::SetTextReading(text, mode_);
      return *this;
    }
    Options&& set_text(bool text) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_text(text));
    }
    // No `text()` getter is provided. On Windows `mode()` can have unspecified
    // text mode, resolved using `_get_fmode()`. Not on Windows the concept does
    // not exist.

    // If `std::nullopt`, the current position reported by `pos()` corresponds
    // to the current `FILE` position if possible, otherwise 0 is assumed as the
    // initial position. Random access is supported if the `FILE` supports
    // random access.
    //
    // If not `std::nullopt`, this position is assumed initially, to be reported
    // by `pos()`. It does not need to correspond to the current `FILE`
    // position. Random access is not supported.
    //
    // Default: `std::nullopt`.
    Options& set_assumed_pos(std::optional<Position> assumed_pos) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      assumed_pos_ = assumed_pos;
      return *this;
    }
    Options&& set_assumed_pos(std::optional<Position> assumed_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_assumed_pos(assumed_pos));
    }
    std::optional<Position> assumed_pos() const { return assumed_pos_; }

    // If `true`, supports reading up to the end of the file, then retrying when
    // the file has grown. This disables caching the file size.
    //
    // Default: `false`.
    Options& set_growing_source(bool growing_source) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      growing_source_ = growing_source;
      return *this;
    }
    Options&& set_growing_source(bool growing_source) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_growing_source(growing_source));
    }
    bool growing_source() const { return growing_source_; }

   private:
#ifndef _WIN32
    std::string mode_ = "re";
#else
    std::string mode_ = "rbN";
#endif
    std::optional<Position> assumed_pos_;
    bool growing_source_ = false;
  };

  // Returns the `CFileHandle` being read from. Unchanged by `Close()`.
  virtual CFileHandle SrcCFileHandle() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns the `FILE*` being read from. If the `FILE*` is owned then changed
  // to `nullptr` by `Close()`, otherwise unchanged.
  virtual FILE* SrcFile() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns the filename of the `FILE*` being read from, or "<none>" for
  // closed-constructed or moved-from `CFileReader`. Unchanged by `Close()`.
  //
  // If the constructor from filename was used, this is the filename passed to
  // the constructor, otherwise a filename is inferred from the `FILE*`. This
  // can be a placeholder instead of a real filename if the `FILE*` does not
  // refer to a named file or inferring the filename is not supported.
  //
  // If `Src` does not support `filename()`, returns "<unsupported>".
  absl::string_view filename() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return SrcCFileHandle().filename();
  }

  bool ToleratesReadingAhead() override {
    return BufferedReader::ToleratesReadingAhead() ||
           CFileReaderBase::SupportsRandomAccess();
  }
  bool SupportsRandomAccess() override { return supports_random_access_; }

 protected:
  explicit CFileReaderBase(Closed) noexcept : BufferedReader(kClosed) {}

  explicit CFileReaderBase(BufferOptions buffer_options, bool growing_source);

  CFileReaderBase(CFileReaderBase&& that) noexcept;
  CFileReaderBase& operator=(CFileReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options, bool growing_source);
  void Initialize(FILE* src, Options&& options);
  void InitializePos(FILE* src, Options&& options
#ifdef _WIN32
                     ,
                     bool mode_was_passed_to_fopen
#endif
  );
  ABSL_ATTRIBUTE_COLD bool FailOperation(absl::string_view operation);

  void Done() override;
  absl::Status AnnotateStatusImpl(absl::Status status) override;
  bool ReadInternal(size_t min_length, size_t max_length, char* dest) override;
  bool SeekBehindBuffer(Position new_pos) override;
  std::optional<Position> SizeImpl() override;

 private:
  absl::Status FailedOperationStatus(absl::string_view operation);

  bool growing_source_ = false;
  bool supports_random_access_ = false;
  absl::Status random_access_status_;
#ifdef _WIN32
  std::optional<int> original_mode_;
#endif

  // Invariant:
  //   `limit_pos() <= std::numeric_limits<cfile_internal::Offset>::max()`
};

// A `Reader` which reads from a `FILE`.
//
// `CFileReader` supports random access if
// `Options::assumed_pos() == std::nullopt` and the `FILE` supports random
// access (this is checked by calling `ftell()` and `fseek(SEEK_END)`).
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the `FILE` being read from. `Src` must support
// `Dependency<CFileHandle, Src>`, e.g. `OwnedCFile` (owned, default),
// `UnownedCFile` (not owned), `AnyCFile` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `OwnedCFile` if
// the first constructor argument is a filename or a `FILE*`, otherwise as
// `TargetT` of the type of the first constructor argument.
//
// Warning: if random access is not supported and the `FILE` is not owned, it
// will have an unpredictable amount of extra data consumed because of
// buffering.
//
// Until the `CFileReader` is closed or no longer used, the `FILE` must not be
// closed nor have its position changed.
template <typename Src = OwnedCFile>
class CFileReader : public CFileReaderBase {
 public:
  // Creates a closed `CFileReader`.
  explicit CFileReader(Closed) noexcept : CFileReaderBase(kClosed) {}

  // Will read from the `FILE` provided by `src`.
  explicit CFileReader(Initializer<Src> src, Options options = Options());

  // Will read from `src`.
  template <
      typename DependentSrc = Src,
      std::enable_if_t<std::is_constructible_v<DependentSrc, FILE*>, int> = 0>
  explicit CFileReader(FILE* src ABSL_ATTRIBUTE_LIFETIME_BOUND,
                       Options options = Options());

  // Opens a file for reading.
  //
  // If opening the file fails, `CFileReader` will be failed and closed.
  //
  // This constructor is present only if `Src` supports `Open()`.
  template <typename DependentSrc = Src,
            std::enable_if_t<
                std::conjunction_v<CFileSupportsOpen<DependentSrc>,
                                   std::is_default_constructible<DependentSrc>>,
                int> = 0>
  explicit CFileReader(PathInitializer filename, Options options = Options());

  CFileReader(CFileReader&& that) = default;
  CFileReader& operator=(CFileReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `CFileReader`. This avoids
  // constructing a temporary `CFileReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());
  template <
      typename DependentSrc = Src,
      std::enable_if_t<std::is_constructible_v<DependentSrc, FILE*>, int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(FILE* src,
                                          Options options = Options());
  template <typename DependentSrc = Src,
            std::enable_if_t<std::conjunction_v<CFileSupportsOpen<DependentSrc>,
                                                SupportsReset<DependentSrc>>,
                             int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(PathInitializer filename,
                                          Options options = Options());

  // Returns the object providing and possibly owning the `FILE` being read
  // from. Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  CFileHandle SrcCFileHandle() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }
  FILE* SrcFile() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get().get();
  }

  // An optimized implementation in a derived class, avoiding a virtual call.
  absl::string_view filename() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.get().filename();
  }

 protected:
  void Done() override;

 private:
  template <typename DependentSrc = Src,
            std::enable_if_t<CFileSupportsOpen<DependentSrc>::value, int> = 0>
  void OpenImpl(PathInitializer filename, Options&& options);

  // The object providing and possibly owning the `FILE` being read from.
  Dependency<CFileHandle, Src> src_;
};

explicit CFileReader(Closed) -> CFileReader<DeleteCtad<Closed>>;
template <typename Src>
explicit CFileReader(
    Src&& src, CFileReaderBase::Options options = CFileReaderBase::Options())
    -> CFileReader<std::conditional_t<
        std::disjunction_v<std::is_convertible<Src&&, FILE*>,
                           std::is_convertible<Src&&, PathInitializer>>,
        OwnedCFile, TargetT<Src>>>;

// Implementation details follow.

inline CFileReaderBase::CFileReaderBase(BufferOptions buffer_options,
                                        bool growing_source)
    : BufferedReader(buffer_options), growing_source_(growing_source) {}

inline CFileReaderBase::CFileReaderBase(CFileReaderBase&& that) noexcept
    : BufferedReader(static_cast<BufferedReader&&>(that)),
      growing_source_(that.growing_source_),
      supports_random_access_(
          std::exchange(that.supports_random_access_, false)),
      random_access_status_(std::move(that.random_access_status_))
#ifdef _WIN32
      ,
      original_mode_(that.original_mode_)
#endif
{
}

inline CFileReaderBase& CFileReaderBase::operator=(
    CFileReaderBase&& that) noexcept {
  BufferedReader::operator=(static_cast<BufferedReader&&>(that));
  growing_source_ = that.growing_source_;
  supports_random_access_ = std::exchange(that.supports_random_access_, false);
  random_access_status_ = std::move(that.random_access_status_);
#ifdef _WIN32
  original_mode_ = that.original_mode_;
#endif
  return *this;
}

inline void CFileReaderBase::Reset(Closed) {
  BufferedReader::Reset(kClosed);
  growing_source_ = false;
  supports_random_access_ = false;
  random_access_status_ = absl::OkStatus();
#ifdef _WIN32
  original_mode_ = std::nullopt;
#endif
}

inline void CFileReaderBase::Reset(BufferOptions buffer_options,
                                   bool growing_source) {
  BufferedReader::Reset(buffer_options);
  growing_source_ = growing_source;
  supports_random_access_ = false;
  random_access_status_ = absl::OkStatus();
#ifdef _WIN32
  original_mode_ = std::nullopt;
#endif
}

template <typename Src>
inline CFileReader<Src>::CFileReader(Initializer<Src> src, Options options)
    : CFileReaderBase(options.buffer_options(), options.growing_source()),
      src_(std::move(src)) {
  Initialize(src_.get().get(), std::move(options));
}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<std::is_constructible_v<DependentSrc, FILE*>, int>>
inline CFileReader<Src>::CFileReader(FILE* src ABSL_ATTRIBUTE_LIFETIME_BOUND,
                                     Options options)
    : CFileReader(riegeli::Maker(src), std::move(options)) {}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<
              std::conjunction_v<CFileSupportsOpen<DependentSrc>,
                                 std::is_default_constructible<DependentSrc>>,
              int>>
inline CFileReader<Src>::CFileReader(PathInitializer filename, Options options)
    : CFileReaderBase(options.buffer_options(), options.growing_source()),
      src_(riegeli::Maker()) {
  OpenImpl(std::move(filename), std::move(options));
}

template <typename Src>
inline void CFileReader<Src>::Reset(Closed) {
  CFileReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void CFileReader<Src>::Reset(Initializer<Src> src, Options options) {
  CFileReaderBase::Reset(options.buffer_options(), options.growing_source());
  src_.Reset(std::move(src));
  Initialize(src_.get().get(), std::move(options));
}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<std::is_constructible_v<DependentSrc, FILE*>, int>>
inline void CFileReader<Src>::Reset(FILE* src, Options options) {
  Reset(riegeli::Maker(src), std::move(options));
}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<std::conjunction_v<CFileSupportsOpen<DependentSrc>,
                                              SupportsReset<DependentSrc>>,
                           int>>
inline void CFileReader<Src>::Reset(PathInitializer filename, Options options) {
  // In case `filename` is owned by `src_` and gets invalidated.
  std::string filename_copy = std::move(filename);
  riegeli::Reset(src_.manager());
  CFileReaderBase::Reset(options.buffer_options(), options.growing_source());
  OpenImpl(std::move(filename_copy), std::move(options));
}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<CFileSupportsOpen<DependentSrc>::value, int>>
void CFileReader<Src>::OpenImpl(PathInitializer filename, Options&& options) {
  absl::Status status =
      src_.manager().Open(std::move(filename), options.mode());
  if (ABSL_PREDICT_FALSE(!status.ok())) {
    CFileReaderBase::Reset(kClosed);
    FailWithoutAnnotation(std::move(status));
    return;
  }
  InitializePos(src_.get().get(), std::move(options)
#ifdef _WIN32
                                      ,
                /*mode_was_passed_to_fopen=*/true
#endif
  );
}

template <typename Src>
void CFileReader<Src>::Done() {
  CFileReaderBase::Done();
  if (src_.IsOwning()) {
    if (absl::Status status = src_.get().Close();
        ABSL_PREDICT_FALSE(!status.ok())) {
      Fail(std::move(status));
    }
  }
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_CFILE_READER_H_


================================================
FILE: riegeli/bytes/cfile_writer.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef _WIN32

// Make `fseeko()` and `ftello()` available.
#if !defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < 500
#undef _XOPEN_SOURCE
#define _XOPEN_SOURCE 500
#endif

// Make `off_t` 64-bit even on 32-bit systems.
#undef _FILE_OFFSET_BITS
#define _FILE_OFFSET_BITS 64

#endif

#include "riegeli/bytes/cfile_writer.h"

#ifdef _WIN32
#include <fcntl.h>
#include <io.h>
#endif
#include <stddef.h>
#include <stdio.h>
#ifndef _WIN32
#include <unistd.h>
#endif

#include <cerrno>
#include <limits>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/global.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/cfile_handle.h"
#include "riegeli/bytes/cfile_internal_for_cc.h"
#include "riegeli/bytes/cfile_reader.h"
#include "riegeli/bytes/file_mode_string.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void CFileWriterBase::Initialize(FILE* dest, Options&& options) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of CFileReader: null FILE pointer";
  InitializePos(dest, std::move(options), /*mode_was_passed_to_fopen=*/false);
}

void CFileWriterBase::InitializePos(FILE* dest, Options&& options,
                                    bool mode_was_passed_to_fopen) {
  RIEGELI_ASSERT_EQ(supports_random_access_, LazyBoolState::kUnknown)
      << "Failed precondition of CFileWriterBase::InitializePos(): "
         "supports_random_access_ not reset";
  RIEGELI_ASSERT_EQ(supports_read_mode_, LazyBoolState::kUnknown)
      << "Failed precondition of CFileWriterBase::InitializePos(): "
         "supports_read_mode_ not reset";
  RIEGELI_ASSERT_OK(random_access_status_)
      << "Failed precondition of CFileWriterBase::InitializePos(): "
         "random_access_status_ not reset";
  RIEGELI_ASSERT_OK(read_mode_status_)
      << "Failed precondition of CFileWriterBase::InitializePos(): "
         "read_mode_status_ not reset";
#ifdef _WIN32
  RIEGELI_ASSERT_EQ(original_mode_, std::nullopt)
      << "Failed precondition of CFileWriterBase::InitializePos(): "
         "original_mode_ not reset";
#endif
  if (ABSL_PREDICT_FALSE(ferror(dest))) {
    FailOperation("FILE");
    return;
  }
#ifdef _WIN32
  int text_mode = file_internal::GetTextAsFlags(options.mode());
#endif  // _WIN32
  if (mode_was_passed_to_fopen) {
    if (!file_internal::GetRead(options.mode())) {
      supports_read_mode_ = LazyBoolState::kFalse;
      read_mode_status_ = Global(
          [] { return absl::UnimplementedError("Mode does not include '+'"); });
    }
  }
#ifdef _WIN32
  else if (text_mode != 0) {
    const int fd = _fileno(dest);
    if (ABSL_PREDICT_FALSE(fd < 0)) {
      FailOperation("_fileno()");
      return;
    }
    const int original_mode = _setmode(fd, text_mode);
    if (ABSL_PREDICT_FALSE(original_mode < 0)) {
      FailOperation("_setmode()");
      return;
    }
    original_mode_ = original_mode;
  }
  if (options.assumed_pos() == std::nullopt) {
    if (text_mode == 0) {
      const int fd = _fileno(dest);
      if (ABSL_PREDICT_FALSE(fd < 0)) {
        FailOperation("_fileno()");
        return;
      }
      // There is no `_getmode()`, but `_setmode()` returns the previous mode.
      text_mode = _setmode(fd, _O_BINARY);
      if (ABSL_PREDICT_FALSE(text_mode < 0)) {
        FailOperation("_setmode()");
        return;
      }
      if (ABSL_PREDICT_FALSE(_setmode(fd, text_mode) < 0)) {
        FailOperation("_setmode()");
        return;
      }
    }
    if (text_mode != _O_BINARY) options.set_assumed_pos(0);
  }
#endif  // _WIN32
  if (options.assumed_pos() != std::nullopt) {
    if (ABSL_PREDICT_FALSE(
            *options.assumed_pos() >
            Position{std::numeric_limits<cfile_internal::Offset>::max()})) {
      FailOverflow();
      return;
    }
    set_start_pos(*options.assumed_pos());
    supports_random_access_ = LazyBoolState::kFalse;
    supports_read_mode_ = LazyBoolState::kFalse;
    random_access_status_ = Global([] {
      return absl::UnimplementedError(
          "CFileWriterBase::Options::assumed_pos() excludes random access");
    });
    read_mode_status_.Update(random_access_status_);
  } else {
    if (file_internal::GetAppend(options.mode())) {
      if (cfile_internal::FSeek(dest, 0, SEEK_END) != 0) {
        // Random access is not supported. Assume 0 as the initial position.
        supports_random_access_ = LazyBoolState::kFalse;
        supports_read_mode_ = LazyBoolState::kFalse;
        random_access_status_ =
            FailedOperationStatus(cfile_internal::kFSeekFunctionName);
        read_mode_status_.Update(random_access_status_);
        clearerr(dest);
        return;
      }
    }
    const cfile_internal::Offset file_pos = cfile_internal::FTell(dest);
    if (file_pos < 0) {
      // Random access is not supported. Assume 0 as the initial position.
      supports_random_access_ = LazyBoolState::kFalse;
      supports_read_mode_ = LazyBoolState::kFalse;
      random_access_status_ =
          FailedOperationStatus(cfile_internal::kFTellFunctionName);
      read_mode_status_.Update(random_access_status_);
      clearerr(dest);
      return;
    }
    set_start_pos(IntCast<Position>(file_pos));
    if (file_internal::GetAppend(options.mode())) {
      // `cfile_internal::FSeek(SEEK_END)` succeeded.
      supports_random_access_ = LazyBoolState::kFalse;
      if (mode_was_passed_to_fopen &&
          supports_read_mode_ == LazyBoolState::kUnknown) {
        supports_read_mode_ = LazyBoolState::kTrue;
      }
      random_access_status_ = Global([] {
        return absl::UnimplementedError("Append mode excludes random access");
      });
    } else {
      // `cfile_internal::FTell()` succeeded, and
      // `cfile_internal::FSeek(SEEK_END)` will be checked later.
      // `supports_random_access_` and `supports_read_mode_` are left as
      // `LazyBoolState::kUnknown`.
    }
  }
  BeginRun();
}

void CFileWriterBase::Done() {
  BufferedWriter::Done();
#ifdef _WIN32
  if (original_mode_ != std::nullopt) {
    FILE* const dest = DestFile();
    const int fd = _fileno(dest);
    if (ABSL_PREDICT_FALSE(fd < 0)) {
      FailOperation("_fileno()");
    } else if (ABSL_PREDICT_FALSE(_setmode(fd, *original_mode_) < 0)) {
      FailOperation("_setmode()");
    }
  }
#endif  // _WIN32
  random_access_status_ = absl::OkStatus();
  read_mode_status_.Update(absl::OkStatus());
  associated_reader_.Reset();
}

inline absl::Status CFileWriterBase::FailedOperationStatus(
    absl::string_view operation) {
  const int error_number = errno;
  RIEGELI_ASSERT_NE(error_number, 0)
      << "Failed precondition of CFileWriterBase::FailedOperationStatus(): "
         "zero errno";
  return absl::ErrnoToStatus(error_number, absl::StrCat(operation, " failed"));
}

bool CFileWriterBase::FailOperation(absl::string_view operation) {
  return Fail(FailedOperationStatus(operation));
}

absl::Status CFileWriterBase::AnnotateStatusImpl(absl::Status status) {
  return BufferedWriter::AnnotateStatusImpl(
      Annotate(status, absl::StrCat("writing ", filename())));
}

inline absl::Status CFileWriterBase::SizeStatus() {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of CFileWriterBase::SizeStatus(): ";
  FILE* const dest = DestFile();
  if (cfile_internal::FSeek(dest, 0, SEEK_END) != 0) {
    // Not supported.
    const absl::Status status =
        FailedOperationStatus(cfile_internal::kFSeekFunctionName);
    clearerr(dest);
    return status;
  }
  const cfile_internal::Offset file_size = cfile_internal::FTell(dest);
  if (ABSL_PREDICT_FALSE(file_size < 0)) {
    FailOperation(cfile_internal::kFTellFunctionName);
    return status();
  }
  if (ABSL_PREDICT_FALSE(cfile_internal::FSeek(
                             dest, IntCast<cfile_internal::Offset>(limit_pos()),
                             SEEK_SET) != 0)) {
    FailOperation(cfile_internal::kFSeekFunctionName);
    return status();
  }
  // Supported.
  return absl::OkStatus();
}

bool CFileWriterBase::SupportsRandomAccess() {
  if (ABSL_PREDICT_TRUE(supports_random_access_ != LazyBoolState::kUnknown)) {
    return supports_random_access_ == LazyBoolState::kTrue;
  }
  RIEGELI_ASSERT_NE(supports_read_mode_, LazyBoolState::kTrue)
      << "Failed invariant of CFileWriterBase: "
         "supports_random_access_ is unknown but supports_read_mode_ is true";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Status status = SizeStatus();
  if (!status.ok()) {
    // Not supported.
    supports_random_access_ = LazyBoolState::kFalse;
    supports_read_mode_ = LazyBoolState::kFalse;
    random_access_status_ = std::move(status);
    read_mode_status_.Update(random_access_status_);
    return false;
  }
  // Supported.
  supports_random_access_ = LazyBoolState::kTrue;
  return true;
}

bool CFileWriterBase::SupportsTruncate() {
  if (!SupportsRandomAccess()) return false;
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  FILE* const dest = DestFile();
#ifndef _WIN32
  return fileno(dest) >= 0;
#else   // _WIN32
  return _fileno(dest) >= 0;
#endif  // _WIN32
}

bool CFileWriterBase::SupportsReadMode() {
  if (ABSL_PREDICT_TRUE(supports_read_mode_ != LazyBoolState::kUnknown)) {
    return supports_read_mode_ == LazyBoolState::kTrue;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (supports_random_access_ == LazyBoolState::kUnknown) {
    // It is unknown whether even size is supported.
    absl::Status status = SizeStatus();
    if (!status.ok()) {
      // Not supported.
      supports_random_access_ = LazyBoolState::kFalse;
      supports_read_mode_ = LazyBoolState::kFalse;
      random_access_status_ = std::move(status);
      read_mode_status_ = random_access_status_;
      return false;
    }
    // Size is supported.
    supports_random_access_ = LazyBoolState::kTrue;
  }

  FILE* const dest = DestFile();
  if (cfile_internal::FSeek(dest, 0, SEEK_END) != 0) {
    // Read mode is not supported.
    supports_read_mode_ = LazyBoolState::kFalse;
    read_mode_status_ =
        FailedOperationStatus(cfile_internal::kFSeekFunctionName);
    clearerr(dest);
    return false;
  }
  char buf[1];
  fread(buf, 1, 1, dest);
  if (ferror(dest)) {
    // Not supported.
    supports_read_mode_ = LazyBoolState::kFalse;
    read_mode_status_ = FailedOperationStatus("fread()");
    clearerr(dest);
  } else {
    // Supported.
    supports_read_mode_ = LazyBoolState::kTrue;
  }
  if (ABSL_PREDICT_FALSE(cfile_internal::FSeek(
                             dest, IntCast<cfile_internal::Offset>(start_pos()),
                             SEEK_SET) != 0)) {
    return FailOperation(cfile_internal::kFSeekFunctionName);
  }
  return supports_read_mode_ == LazyBoolState::kTrue;
}

inline bool CFileWriterBase::WriteMode() {
  if (ABSL_PREDICT_TRUE(!read_mode_)) return true;
  read_mode_ = false;
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  FILE* const dest = DestFile();
  if (ABSL_PREDICT_FALSE(cfile_internal::FSeek(
                             dest, IntCast<cfile_internal::Offset>(start_pos()),
                             SEEK_SET) != 0)) {
    return FailOperation(cfile_internal::kFSeekFunctionName);
  }
  return true;
}

bool CFileWriterBase::WriteInternal(absl::string_view src) {
  RIEGELI_ASSERT(!src.empty())
      << "Failed precondition of BufferedWriter::WriteInternal(): "
         "nothing to write";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedWriter::WriteInternal()";
  if (ABSL_PREDICT_FALSE(!WriteMode())) return false;
  FILE* const dest = DestFile();
  if (ABSL_PREDICT_FALSE(
          src.size() >
          Position{std::numeric_limits<cfile_internal::Offset>::max()} -
              start_pos())) {
    return FailOverflow();
  }
  const size_t length_written = fwrite(src.data(), 1, src.size(), dest);
  RIEGELI_ASSERT_LE(length_written, src.size())
      << "fwrite() wrote more than requested";
  move_start_pos(length_written);
  if (ABSL_PREDICT_FALSE(length_written < src.size())) {
    RIEGELI_ASSERT(ferror(dest))
        << "fwrite() succeeded but wrote less than requested";
    return FailOperation("fwrite()");
  }
  return true;
}

bool CFileWriterBase::FlushBehindBuffer(absl::string_view src,
                                        FlushType flush_type) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::FlushBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!WriteMode())) return false;
  return BufferedWriter::FlushBehindBuffer(src, flush_type);
}

bool CFileWriterBase::SeekBehindBuffer(Position new_pos) {
  RIEGELI_ASSERT_NE(new_pos, pos())
      << "Failed precondition of BufferedWriter::SeekBehindBuffer(): "
         "position unchanged, use Seek() instead";
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::SeekBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!CFileWriterBase::SupportsRandomAccess())) {
    if (ok()) Fail(random_access_status_);
    return false;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  read_mode_ = false;
  FILE* const dest = DestFile();
  if (new_pos > start_pos()) {
    // Seeking forwards.
    if (ABSL_PREDICT_FALSE(cfile_internal::FSeek(dest, 0, SEEK_END) != 0)) {
      return FailOperation(cfile_internal::kFSeekFunctionName);
    }
    const cfile_internal::Offset file_size = cfile_internal::FTell(dest);
    if (ABSL_PREDICT_FALSE(file_size < 0)) {
      return FailOperation(cfile_internal::kFTellFunctionName);
    }
    if (ABSL_PREDICT_FALSE(new_pos > IntCast<Position>(file_size))) {
      // File ends.
      set_start_pos(IntCast<Position>(file_size));
      return false;
    }
  }
  if (ABSL_PREDICT_FALSE(
          cfile_internal::FSeek(dest, IntCast<cfile_internal::Offset>(new_pos),
                                SEEK_SET) != 0)) {
    return FailOperation(cfile_internal::kFSeekFunctionName);
  }
  set_start_pos(new_pos);
  return true;
}

std::optional<Position> CFileWriterBase::SizeBehindBuffer() {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::SizeBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!CFileWriterBase::SupportsRandomAccess())) {
    if (ok()) Fail(random_access_status_);
    return std::nullopt;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  read_mode_ = false;
  FILE* const dest = DestFile();
  if (ABSL_PREDICT_FALSE(cfile_internal::FSeek(dest, 0, SEEK_END) != 0)) {
    FailOperation(cfile_internal::kFSeekFunctionName);
    return std::nullopt;
  }
  const cfile_internal::Offset file_size = cfile_internal::FTell(dest);
  if (ABSL_PREDICT_FALSE(file_size < 0)) {
    FailOperation(cfile_internal::kFTellFunctionName);
    return std::nullopt;
  }
  if (ABSL_PREDICT_FALSE(cfile_internal::FSeek(
                             dest, IntCast<cfile_internal::Offset>(start_pos()),
                             SEEK_SET) != 0)) {
    FailOperation(cfile_internal::kFSeekFunctionName);
    return std::nullopt;
  }
  return IntCast<Position>(file_size);
}

bool CFileWriterBase::TruncateBehindBuffer(Position new_size) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::TruncateBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  read_mode_ = false;
  FILE* const dest = DestFile();
#ifndef _WIN32
  const int fd = fileno(dest);
  if (ABSL_PREDICT_FALSE(fd < 0)) return FailOperation("fileno()");
#else   // _WIN32
  const int fd = _fileno(dest);
  if (ABSL_PREDICT_FALSE(fd < 0)) return FailOperation("_fileno()");
#endif  // _WIN32
  if (new_size >= start_pos()) {
    // Seeking forwards.
    if (ABSL_PREDICT_FALSE(cfile_internal::FSeek(dest, 0, SEEK_END) != 0)) {
      return FailOperation(cfile_internal::kFSeekFunctionName);
    }
    const cfile_internal::Offset file_size = cfile_internal::FTell(dest);
    if (ABSL_PREDICT_FALSE(file_size < 0)) {
      return FailOperation(cfile_internal::kFTellFunctionName);
    }
    if (ABSL_PREDICT_FALSE(new_size > IntCast<Position>(file_size))) {
      // File ends.
      set_start_pos(IntCast<Position>(file_size));
      return false;
    }
  }
  if (ABSL_PREDICT_FALSE(fflush(dest) != 0)) {
    return FailOperation("fflush()");
  }
#ifndef _WIN32
again:
  if (ABSL_PREDICT_FALSE(ftruncate(fd, IntCast<off_t>(new_size)) < 0)) {
    if (errno == EINTR) goto again;
    return FailOperation("ftruncate()");
  }
#else   // _WIN32
  if (ABSL_PREDICT_FALSE(_chsize_s(fd, IntCast<__int64>(new_size)) != 0)) {
    return FailOperation("_chsize_s()");
  }
#endif  // _WIN32
  if (ABSL_PREDICT_FALSE(
          cfile_internal::FSeek(dest, IntCast<cfile_internal::Offset>(new_size),
                                SEEK_SET) != 0)) {
    return FailOperation(cfile_internal::kFSeekFunctionName);
  }
  set_start_pos(new_size);
  return true;
}

Reader* CFileWriterBase::ReadModeBehindBuffer(Position initial_pos) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::ReadModeBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!CFileWriterBase::SupportsReadMode())) {
    if (ok()) Fail(read_mode_status_);
    return nullptr;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  FILE* const dest = DestFile();
  // Synchronize the writing aspect and the reading aspect of the `FILE`.
  if (ABSL_PREDICT_FALSE(cfile_internal::FSeek(dest, 0, SEEK_CUR) != 0)) {
    FailOperation(cfile_internal::kFSeekFunctionName);
    return nullptr;
  }
  CFileReader<UnownedCFile>* const reader = associated_reader_.ResetReader(
      UnownedCFile(DestCFileHandle()),
      CFileReaderBase::Options().set_buffer_options(buffer_options()));
  reader->Seek(initial_pos);
  read_mode_ = true;
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/cfile_writer.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_CFILE_WRITER_H_
#define RIEGELI_BYTES_CFILE_WRITER_H_

#include <stdint.h>
#include <stdio.h>

#include <optional>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/string_ref.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/cfile_handle.h"
#include "riegeli/bytes/file_mode_string.h"
#include "riegeli/bytes/path_ref.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

template <typename Src>
class CFileReader;
class Reader;

// Template parameter independent part of `CFileWriter`.
class CFileWriterBase : public BufferedWriter {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // If `CFileWriter` opens a `FILE` with a filename, `mode()` is the second
    // argument of `fopen()` and specifies the open mode, typically "w" or "a"
    // (on Windows: "wb" or "ab").
    //
    // `mode()` can also be changed with `set_existing(), `set_read()`,
    // `set_append()`, `set_exclusive()`, `set_inheritable()`, and `set_text()`.
    //
    // Default: "we" (on Windows: "wbN").
    Options& set_mode(StringInitializer mode) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      riegeli::Reset(mode_, std::move(mode));
      return *this;
    }
    Options&& set_mode(StringInitializer mode) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_mode(std::move(mode)));
    }
    const std::string& mode() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return mode_;
    }

    // If `false`, the file will be created if it does not exist, or it will be
    // truncated to empty if it exists. This implies `set_read(false)` and
    // `set_append(false)` unless overwritten later.
    //
    // If `true`, the file must already exist, and its contents will not be
    // truncated. Writing will start from the beginning, with random access
    // supported. This implies `set_read(true)`.
    //
    // If `CFileWriter` writes to an already open `FILE`, `existing()` has no
    // effect.
    //
    // `set_existing()` affects `mode()`.
    //
    // Default: `false`.
    Options& set_existing(bool existing) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      file_internal::SetExisting(existing, mode_);
      return *this;
    }
    Options&& set_existing(bool existing) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_existing(existing));
    }
    bool existing() const { return file_internal::GetExisting(mode_); }

    // If `false`, the `FILE` will be open for writing, except that
    // `set_read(false)` has no effect after `set_existing(true)`.
    //
    // If `true`, the `FILE` will be open for writing and reading (using
    // `ReadMode()`).
    //
    // If `CFileWriter` writes to an already open `FILE`, `read()` has no
    // effect.
    //
    // `set_read()` affects `mode()`.
    //
    // Default: `false`.
    Options& set_read(bool read) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      file_internal::SetRead(read, mode_);
      return *this;
    }
    Options&& set_read(bool read) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_read(read));
    }
    bool read() const { return file_internal::GetRead(mode_); }

    // If `false`, the file will be truncated to empty if it exists.
    //
    // If `true`, the file will not be truncated if it exists, and writing will
    // always happen at its end.
    //
    // Calling `set_append()` with any argument after `set_existing(true)`
    // undoes the effect if the file does not exist: it will be created.
    //
    // If `CFileWriter` writes to an already open `FILE` and `assumed_pos()` is
    // not set, `append()` should be `true` if the `FILE` was originally open
    // in append mode. This allows to determine the effective initial position
    // and lets `SupportsRandomAccess()` correctly return `false`.
    //
    // `set_append()` affects `mode()`.
    //
    // Default: `false`.
    Options& set_append(bool append) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      file_internal::SetAppend(append, mode_);
      return *this;
    }
    Options&& set_append(bool append) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_append(append));
    }
    bool append() const { return file_internal::GetAppend(mode_); }

    // If `false`, the file will be created if it does not exist, or it will be
    // opened if it exists (truncated to empty by default, or left unchanged if
    // `set_existing(true)` or `set_append(true)` was used).
    //
    // If `true`, the file will be created if it does not exist, or opening will
    // fail if it exists. This is not supported by all systems, but it is
    // specified by C++17 and supported at least on Linux, Windows, FreeBSD,
    // OpenBSD, NetBSD, and MacOS X.
    //
    // If `CFileWriter` writes to an already open `FILE`, `exclusive()` has no
    // effect.
    //
    // `set_exclusive()` affects `mode()`.
    //
    // Default: `false`.
    Options& set_exclusive(bool exclusive) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      file_internal::SetExclusive(exclusive, mode_);
      return *this;
    }
    Options&& set_exclusive(bool exclusive) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_exclusive(exclusive));
    }
    bool exclusive() const { return file_internal::GetExclusive(mode_); }

    // If `false`, `execve()` (`CreateProcess()` on Windows) will close the
    // file. This is not supported by all systems, but it is supported at least
    // on Linux, Windows, FreeBSD, OpenBSD, NetBSD, and it is planned for POSIX
    // (https://www.austingroupbugs.net/view.php?id=411). For MacOS X this is
    // emulated by `CFileWriter`.
    //
    // If `true`, the file will remain open across `execve()` (`CreateProcess()`
    // on Windows).
    //
    // If `CFileWriter` writes to an already open file, `inheritable()` has no
    // effect.
    //
    // `set_inheritable()` affects `mode()`.
    //
    // Default: `false`.
    Options& set_inheritable(bool inheritable) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      file_internal::SetInheritableWriting(inheritable, mode_);
      return *this;
    }
    Options&& set_inheritable(bool inheritable) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_inheritable(inheritable));
    }
    bool inheritable() const { return file_internal::GetInheritable(mode_); }

    // If `false`, data will be written directly to the file. This is called the
    // binary mode.
    //
    // If `true`, text mode translation will be applied on Windows:
    // LF characters are translated to CR-LF.
    //
    // It is recommended to use `WriteLine()` or `TextWriter` instead, which
    // expect a binary mode `Writer`.
    //
    // `set_text()` has an effect only on Windows. It is applicable whenever
    // `CFileWriter` opens a `FILE` with a filename or reads from an already
    // open `FILE`.
    //
    // `set_text()` affects `mode()`.
    //
    // Default: `false`.
    Options& set_text(bool text) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      file_internal::SetTextWriting(text, mode_);
      return *this;
    }
    Options&& set_text(bool text) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_text(text));
    }
    // No `text()` getter is provided. On Windows `mode()` can have unspecified
    // text mode, resolved using `_get_fmode()`. Not on Windows the concept does
    // not exist.

    // If `std::nullopt`, the current position reported by `pos()` corresponds
    // to the current `FILE` position if possible, otherwise 0 is assumed as the
    // initial position. Random access is supported if the `FILE` supports
    // random access.
    //
    // If not `std::nullopt`, this position is assumed initially, to be reported
    // by `pos()`. It does not need to correspond to the current `FILE`
    // position. Random access is not supported.
    //
    // Default: `std::nullopt`.
    Options& set_assumed_pos(std::optional<Position> assumed_pos) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      assumed_pos_ = assumed_pos;
      return *this;
    }
    Options&& set_assumed_pos(std::optional<Position> assumed_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_assumed_pos(assumed_pos));
    }
    std::optional<Position> assumed_pos() const { return assumed_pos_; }

   private:
#ifndef _WIN32
    std::string mode_ = "we";
#else
    std::string mode_ = "wbN";
#endif
    std::optional<Position> assumed_pos_;
  };

  // Returns the `CFileHandle` being written to. Unchanged by `Close()`.
  virtual CFileHandle DestCFileHandle() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns the `FILE*` being written to. If the `FILE*` is owned then changed
  // to `nullptr` by `Close()`, otherwise unchanged.
  virtual FILE* DestFile() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns the filename of the `FILE*` being written to, or "<none>" for
  // closed-constructed or moved-from `CFileWriter`. Unchanged by `Close()`.
  //
  // If the constructor from filename was used, this is the filename passed to
  // the constructor, otherwise a filename is inferred from the `FILE*`. This
  // can be a placeholder instead of a real filename if the `FILE*` does not
  // refer to a named file or inferring the filename is not supported.
  //
  // If `Dest` does not support `filename()`, returns "<unsupported>".
  absl::string_view filename() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return DestCFileHandle().filename();
  }

  bool SupportsRandomAccess() override;
  bool SupportsTruncate() override;
  bool SupportsReadMode() override;

 protected:
  explicit CFileWriterBase(Closed) noexcept : BufferedWriter(kClosed) {}

  explicit CFileWriterBase(BufferOptions buffer_options);

  CFileWriterBase(CFileWriterBase&& that) noexcept;
  CFileWriterBase& operator=(CFileWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options);
  void Initialize(FILE* dest, Options&& options);
  void InitializePos(FILE* dest, Options&& options,
                     bool mode_was_passed_to_fopen);
  ABSL_ATTRIBUTE_COLD bool FailOperation(absl::string_view operation);

  void Done() override;
  absl::Status AnnotateStatusImpl(absl::Status status) override;
  bool WriteInternal(absl::string_view src) override;
  bool FlushBehindBuffer(absl::string_view src, FlushType flush_type) override;
  bool SeekBehindBuffer(Position new_pos) override;
  std::optional<Position> SizeBehindBuffer() override;
  bool TruncateBehindBuffer(Position new_size) override;
  Reader* ReadModeBehindBuffer(Position initial_pos) override;

 private:
  // Encodes a `bool` or a marker that the value is not resolved yet.
  enum class LazyBoolState : uint8_t { kUnknown, kTrue, kFalse };

  absl::Status FailedOperationStatus(absl::string_view operation);
  // Lazily determined condition shared by `SupportsRandomAccess()` and
  // `SupportsReadMode()`.
  absl::Status SizeStatus();

  bool WriteMode();

  LazyBoolState supports_random_access_ = LazyBoolState::kUnknown;
  // If `supports_read_mode_ == LazyBoolState::kUnknown`,
  // then at least size is known to be supported
  // when `supports_random_access_ != LazyBoolState::kUnknown`
  // (no matter whether `LazyBoolState::kTrue` or LazyBoolState::kFalse`).
  //
  // Invariant:
  //   if `supports_random_access_ == LazyBoolState::kUnknown` then
  //       `supports_read_mode_ != LazyBoolState::kTrue`
  LazyBoolState supports_read_mode_ = LazyBoolState::kUnknown;
  absl::Status random_access_status_;
  absl::Status read_mode_status_;
#ifdef _WIN32
  std::optional<int> original_mode_;
#endif

  AssociatedReader<CFileReader<UnownedCFile>> associated_reader_;
  bool read_mode_ = false;

  // Invariant:
  //   `start_pos() <= std::numeric_limits<cfile_internal::Offset>::max()`
};

// A `Writer` which writes to a `FILE`.
//
// `CFileWriter` supports random access if
// `Options::assumed_pos() == std::nullopt` and the `FILE` supports random
// access (this is checked by calling `ftell()` and `fseek(SEEK_END)`).
//
// `CFileWriter` supports `ReadMode()` if it supports random access and the
// `FILE` supports reading (this is checked by calling `fread()`).
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the `FILE` being written to. `Dest` must support
// `Dependency<CFileHandle, Dest>`, e.g. `OwnedCFile` (owned, default),
// `UnownedCFile` (not owned), `AnyCFile` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `OwnedCFile` if
// the first constructor argument is a filename or a `FILE*`, otherwise as
// `TargetT` of the type of the first constructor argument.
//
// Until the `CFileWriter` is closed or no longer used, the `FILE` must not be
// closed nor have its position changed, except that if random access is not
// used, careful interleaving of multiple writers is possible: `Flush()` is
// needed before switching to another writer, and `pos()` does not take other
// writers into account.
template <typename Dest = OwnedCFile>
class CFileWriter : public CFileWriterBase {
 public:
  // Creates a closed `CFileWriter`.
  explicit CFileWriter(Closed) noexcept : CFileWriterBase(kClosed) {}

  // Will write to the `FILE` provided by `dest`.
  explicit CFileWriter(Initializer<Dest> dest, Options options = Options());

  // Will write to `dest`.
  template <
      typename DependentDest = Dest,
      std::enable_if_t<std::is_constructible_v<DependentDest, FILE*>, int> = 0>
  explicit CFileWriter(FILE* dest ABSL_ATTRIBUTE_LIFETIME_BOUND,
                       Options options = Options());

  // Opens a file for writing.
  //
  // If opening the file fails, `CFileWriter` will be failed and closed.
  //
  // This constructor is present only if `Dest` supports `Open()`.
  template <typename DependentDest = Dest,
            std::enable_if_t<std::conjunction_v<
                                 CFileSupportsOpen<DependentDest>,
                                 std::is_default_constructible<DependentDest>>,
                             int> = 0>
  explicit CFileWriter(PathInitializer filename, Options options = Options());

  CFileWriter(CFileWriter&& that) = default;
  CFileWriter& operator=(CFileWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `CFileWriter`. This avoids
  // constructing a temporary `CFileWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());
  template <
      typename DependentDest = Dest,
      std::enable_if_t<std::is_constructible_v<DependentDest, FILE*>, int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(FILE* dest,
                                          Options options = Options());
  template <
      typename DependentDest = Dest,
      std::enable_if_t<std::conjunction_v<CFileSupportsOpen<DependentDest>,
                                          SupportsReset<DependentDest>>,
                       int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(PathInitializer filename,
                                          Options options = Options());

  // Returns the object providing and possibly owning the `FILE` being written
  // to. Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  CFileHandle DestCFileHandle() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }
  FILE* DestFile() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get().get();
  }

  // An optimized implementation in a derived class, avoiding a virtual call.
  absl::string_view filename() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.get().filename();
  }

 protected:
  void Done() override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  template <typename DependentDest = Dest,
            std::enable_if_t<CFileSupportsOpen<DependentDest>::value, int> = 0>
  void OpenImpl(PathInitializer filename, Options&& options);

  // The object providing and possibly owning the `FILE` being written to.
  Dependency<CFileHandle, Dest> dest_;
};

explicit CFileWriter(Closed) -> CFileWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit CFileWriter(
    Dest&& dest, CFileWriterBase::Options options = CFileWriterBase::Options())
    -> CFileWriter<std::conditional_t<
        std::disjunction_v<std::is_convertible<Dest&&, FILE*>,
                           std::is_convertible<Dest&&, PathInitializer>>,
        OwnedCFile, TargetT<Dest>>>;

// Implementation details follow.

inline CFileWriterBase::CFileWriterBase(BufferOptions buffer_options)
    : BufferedWriter(buffer_options) {}

inline CFileWriterBase::CFileWriterBase(CFileWriterBase&& that) noexcept
    : BufferedWriter(static_cast<BufferedWriter&&>(that)),
      supports_random_access_(
          std::exchange(that.supports_random_access_, LazyBoolState::kUnknown)),
      supports_read_mode_(
          std::exchange(that.supports_read_mode_, LazyBoolState::kUnknown)),
      random_access_status_(std::move(that.random_access_status_)),
      read_mode_status_(std::move(that.read_mode_status_)),
#ifdef _WIN32
      original_mode_(that.original_mode_),
#endif
      associated_reader_(std::move(that.associated_reader_)),
      read_mode_(that.read_mode_) {
}

inline CFileWriterBase& CFileWriterBase::operator=(
    CFileWriterBase&& that) noexcept {
  BufferedWriter::operator=(static_cast<BufferedWriter&&>(that));
  supports_random_access_ =
      std::exchange(that.supports_random_access_, LazyBoolState::kUnknown);
  supports_read_mode_ =
      std::exchange(that.supports_read_mode_, LazyBoolState::kUnknown);
  random_access_status_ = std::move(that.random_access_status_);
  read_mode_status_ = std::move(that.read_mode_status_);
#ifdef _WIN32
  original_mode_ = that.original_mode_;
#endif
  associated_reader_ = std::move(that.associated_reader_);
  read_mode_ = that.read_mode_;
  return *this;
}

inline void CFileWriterBase::Reset(Closed) {
  BufferedWriter::Reset(kClosed);
  supports_random_access_ = LazyBoolState::kUnknown;
  supports_read_mode_ = LazyBoolState::kUnknown;
  random_access_status_ = absl::OkStatus();
  read_mode_status_ = absl::OkStatus();
#ifdef _WIN32
  original_mode_ = std::nullopt;
#endif
  associated_reader_.Reset();
  read_mode_ = false;
}

inline void CFileWriterBase::Reset(BufferOptions buffer_options) {
  BufferedWriter::Reset(buffer_options);
  supports_random_access_ = LazyBoolState::kUnknown;
  supports_read_mode_ = LazyBoolState::kUnknown;
  random_access_status_ = absl::OkStatus();
  read_mode_status_ = absl::OkStatus();
#ifdef _WIN32
  original_mode_ = std::nullopt;
#endif
  associated_reader_.Reset();
  read_mode_ = false;
}

template <typename Dest>
inline CFileWriter<Dest>::CFileWriter(Initializer<Dest> dest, Options options)
    : CFileWriterBase(options.buffer_options()), dest_(std::move(dest)) {
  Initialize(dest_.get().get(), std::move(options));
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<std::is_constructible_v<DependentDest, FILE*>, int>>
inline CFileWriter<Dest>::CFileWriter(FILE* dest ABSL_ATTRIBUTE_LIFETIME_BOUND,
                                      Options options)
    : CFileWriter(riegeli::Maker(dest), std::move(options)) {}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<
              std::conjunction_v<CFileSupportsOpen<DependentDest>,
                                 std::is_default_constructible<DependentDest>>,
              int>>
inline CFileWriter<Dest>::CFileWriter(PathInitializer filename, Options options)
    : CFileWriterBase(options.buffer_options()), dest_(riegeli::Maker()) {
  OpenImpl(std::move(filename), std::move(options));
}

template <typename Dest>
inline void CFileWriter<Dest>::Reset(Closed) {
  CFileWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void CFileWriter<Dest>::Reset(Initializer<Dest> dest, Options options) {
  CFileWriterBase::Reset(options.buffer_options());
  dest_.Reset(std::move(dest));
  Initialize(dest_.get().get(), std::move(options));
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<std::is_constructible_v<DependentDest, FILE*>, int>>
inline void CFileWriter<Dest>::Reset(FILE* dest, Options options) {
  Reset(riegeli::Maker(dest), std::move(options));
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<std::conjunction_v<CFileSupportsOpen<DependentDest>,
                                              SupportsReset<DependentDest>>,
                           int>>
inline void CFileWriter<Dest>::Reset(PathInitializer filename,
                                     Options options) {
  // In case `filename` is owned by `dest_` and gets invalidated.
  std::string filename_copy = std::move(filename);
  riegeli::Reset(dest_.manager());
  CFileWriterBase::Reset(options.buffer_options());
  OpenImpl(std::move(filename_copy), std::move(options));
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<CFileSupportsOpen<DependentDest>::value, int>>
void CFileWriter<Dest>::OpenImpl(PathInitializer filename, Options&& options) {
  absl::Status status =
      dest_.manager().Open(std::move(filename), options.mode());
  if (ABSL_PREDICT_FALSE(!status.ok())) {
    CFileWriterBase::Reset(kClosed);
    FailWithoutAnnotation(std::move(status));
    return;
  }
  InitializePos(dest_.get().get(), std::move(options),
                /*mode_was_passed_to_fopen=*/true);
}

template <typename Dest>
void CFileWriter<Dest>::Done() {
  CFileWriterBase::Done();
  if (dest_.IsOwning()) {
    if (absl::Status status = dest_.get().Close();
        ABSL_PREDICT_FALSE(!status.ok())) {
      Fail(std::move(status));
    }
  }
}

template <typename Dest>
bool CFileWriter<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!CFileWriterBase::FlushImpl(flush_type))) return false;
  switch (flush_type) {
    case FlushType::kFromObject:
      if (!dest_.IsOwning() || !dest_.get().IsOwning()) return true;
      ABSL_FALLTHROUGH_INTENDED;
    case FlushType::kFromProcess:
    case FlushType::kFromMachine:
      if (ABSL_PREDICT_FALSE(fflush(dest_.get().get()) != 0)) {
        return FailOperation("fflush()");
      }
      return true;
  }
  RIEGELI_ASSUME_UNREACHABLE()
      << "Unknown flush type: " << static_cast<int>(flush_type);
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_CFILE_WRITER_H_


================================================
FILE: riegeli/bytes/chain_backward_writer.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/chain_backward_writer.h"

#include <stddef.h>

#include <limits>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/strings/cord.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"

namespace riegeli {

void ChainBackwardWriterBase::Done() {
  ChainBackwardWriterBase::FlushImpl(FlushType::kFromObject);
  BackwardWriter::Done();
}

inline void ChainBackwardWriterBase::SyncBuffer(Chain& dest) {
  set_start_pos(pos());
  dest.RemovePrefix(available(), options_);
  set_buffer();
}

inline void ChainBackwardWriterBase::MakeBuffer(Chain& dest, size_t min_length,
                                                size_t recommended_length) {
  const absl::Span<char> buffer = dest.PrependBuffer(
      min_length, recommended_length, Chain::kAnyLength, options_);
  set_buffer(buffer.data(), buffer.size());
}

void ChainBackwardWriterBase::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  if (write_size_hint == std::nullopt) {
    options_.set_size_hint(std::nullopt);
  } else {
    options_.set_size_hint(
        SaturatingIntCast<size_t>(SaturatingAdd(pos(), *write_size_hint)));
  }
}

bool ChainBackwardWriterBase::PushSlow(size_t min_length,
                                       size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of BackwardWriter::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_EQ(limit_pos(), dest.size())
      << "ChainBackwardWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(min_length > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  MakeBuffer(dest, min_length, recommended_length);
  return true;
}

bool ChainBackwardWriterBase::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_EQ(limit_pos(), dest.size())
      << "ChainBackwardWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  dest.Prepend(std::move(src), options_);
  MakeBuffer(dest);
  return true;
}

bool ChainBackwardWriterBase::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_EQ(limit_pos(), dest.size())
      << "ChainBackwardWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  dest.Prepend(src, options_);
  MakeBuffer(dest);
  return true;
}

bool ChainBackwardWriterBase::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_EQ(limit_pos(), dest.size())
      << "ChainBackwardWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  dest.Prepend(std::move(src), options_);
  MakeBuffer(dest);
  return true;
}

bool ChainBackwardWriterBase::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_EQ(limit_pos(), dest.size())
      << "ChainBackwardWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  dest.Prepend(src, options_);
  MakeBuffer(dest);
  return true;
}

bool ChainBackwardWriterBase::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_EQ(limit_pos(), dest.size())
      << "ChainBackwardWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  src.PrependTo(dest, options_);
  MakeBuffer(dest);
  return true;
}

bool ChainBackwardWriterBase::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_EQ(limit_pos(), dest.size())
      << "ChainBackwardWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  dest.Prepend(std::move(src), options_);
  MakeBuffer(dest);
  return true;
}

bool ChainBackwardWriterBase::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_EQ(limit_pos(), dest.size())
      << "ChainBackwardWriter destination changed unexpectedly";
  SyncBuffer(dest);
  return true;
}

bool ChainBackwardWriterBase::TruncateImpl(Position new_size) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_EQ(limit_pos(), dest.size())
      << "ChainBackwardWriter destination changed unexpectedly";
  if (new_size >= start_pos()) {
    if (ABSL_PREDICT_FALSE(new_size > pos())) return false;
    set_cursor(start() - (new_size - start_pos()));
    return true;
  }
  set_start_pos(new_size);
  dest.RemovePrefix(dest.size() - IntCast<size_t>(new_size), options_);
  set_buffer();
  return true;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/chain_backward_writer.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_CHAIN_BACKWARD_WRITER_H_
#define RIEGELI_BYTES_CHAIN_BACKWARD_WRITER_H_

#include <stddef.h>
#include <stdint.h>

#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"

namespace riegeli {

// Template parameter independent part of `ChainBackwardWriter`.
class ChainBackwardWriterBase : public BackwardWriter {
 public:
  class Options {
   public:
    Options() noexcept {}

    // If `false`, replaces existing contents of the destination, clearing it
    // first.
    //
    // If `true`, prepends to existing contents of the destination.
    //
    // Default: `false`.
    Options& set_prepend(bool prepend) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      prepend_ = prepend;
      return *this;
    }
    Options&& set_prepend(bool prepend) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_prepend(prepend));
    }
    bool prepend() const { return prepend_; }

    // Minimal size of a block of allocated data.
    //
    // This is used initially, while the destination is small.
    //
    // Default: `kDefaultMinBlockSize` (512).
    Options& set_min_block_size(size_t min_block_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      min_block_size_ = UnsignedMin(min_block_size, uint32_t{1} << 31);
      return *this;
    }
    Options&& set_min_block_size(size_t min_block_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_min_block_size(min_block_size));
    }
    size_t min_block_size() const { return min_block_size_; }

    // Maximal size of a block of allocated data.
    //
    // This is for performance tuning, not a guarantee: does not apply to
    // objects allocated separately and then written to this
    // `ChainBackwardWriter`.
    //
    // Default: `kDefaultMaxBlockSize` (64K).
    Options& set_max_block_size(size_t max_block_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GT(max_block_size, 0u)
          << "Failed precondition of "
             "ChainBackwardWriterBase::Options::set_max_block_size(): "
             "zero block size";
      max_block_size_ = UnsignedMin(max_block_size, uint32_t{1} << 31);
      return *this;
    }
    Options&& set_max_block_size(size_t max_block_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_max_block_size(max_block_size));
    }
    size_t max_block_size() const { return max_block_size_; }

    // A shortcut for `set_min_block_size(block_size)` with
    // `set_max_block_size(block_size)`.
    Options& set_block_size(size_t block_size) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return set_min_block_size(block_size).set_max_block_size(block_size);
    }
    Options&& set_block_size(size_t block_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_block_size(block_size));
    }

   private:
    bool prepend_ = false;
    // Use `uint32_t` instead of `size_t` to reduce the object size.
    uint32_t min_block_size_ = uint32_t{kDefaultMinBlockSize};
    uint32_t max_block_size_ = uint32_t{kDefaultMaxBlockSize};
  };

  // Returns the `Chain` being written to. Unchanged by `Close()`.
  virtual Chain* DestChain() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool SupportsTruncate() override { return true; }

 protected:
  explicit ChainBackwardWriterBase(Closed) noexcept : BackwardWriter(kClosed) {}

  explicit ChainBackwardWriterBase(const Options& options);

  ChainBackwardWriterBase(ChainBackwardWriterBase&& that) noexcept;
  ChainBackwardWriterBase& operator=(ChainBackwardWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(const Options& options);
  void Initialize(Chain* dest, bool prepend);

  void Done() override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using BackwardWriter::WriteSlow;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(absl::Cord&& src) override;
  bool WriteSlow(ByteFill src) override;
  bool FlushImpl(FlushType flush_type) override;
  bool TruncateImpl(Position new_size) override;

 private:
  // Discards uninitialized space from the beginning of `dest`, so that it
  // contains only actual data written.
  void SyncBuffer(Chain& dest);

  // Prepends uninitialized space to `dest`.
  void MakeBuffer(Chain& dest, size_t min_length = 0,
                  size_t recommended_length = 0);

  Chain::Options options_;

  // Invariants if `ok()`:
  //   `limit() == nullptr || limit() == DestChain()->blocks().front().data()`
  //   `limit_pos() == DestChain()->size()`
};

// A `BackwardWriter` which writes to a `Chain` backwards.
// If `Options::prepend()` is `false` (the default), replaces existing contents
// of the `Chain`, clearing it first. If `Options::prepend()` is `true`,
// prepends to existing contents of the `Chain`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the `Chain` being written to. `Dest` must support
// `Dependency<Chain*, Dest>`, e.g. `Chain*` (not owned, default),
// `Chain` (owned), `Any<Chain*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `Chain` if there
// are no constructor arguments or the only argument is `Options`, otherwise as
// `TargetT` of the type of the first constructor argument, except that CTAD
// is deleted if the first constructor argument is a `Chain&` or `const Chain&`
// (to avoid writing to an unintentionally separate copy of an existing object).
//
//
// The `Chain` must not be accessed until the `ChainBackwardWriter` is closed or
// no longer used.
template <typename Dest = Chain*>
class ChainBackwardWriter : public ChainBackwardWriterBase {
 public:
  // Creates a closed `ChainBackwardWriter`.
  explicit ChainBackwardWriter(Closed) noexcept
      : ChainBackwardWriterBase(kClosed) {}

  // Will write to the `Chain` provided by `dest`.
  explicit ChainBackwardWriter(Initializer<Dest> dest,
                               Options options = Options());

  // Will write to an owned `Chain` which can be accessed by `dest()`.
  // This constructor is present only if `Dest` is `Chain`.
  template <typename DependentDest = Dest,
            std::enable_if_t<std::is_same_v<DependentDest, Chain>, int> = 0>
  explicit ChainBackwardWriter(Options options = Options());

  ChainBackwardWriter(ChainBackwardWriter&& that) = default;
  ChainBackwardWriter& operator=(ChainBackwardWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `ChainBackwardWriter`. This
  // avoids constructing a temporary `ChainBackwardWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());
  template <typename DependentDest = Dest,
            std::enable_if_t<std::is_same_v<DependentDest, Chain>, int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Options options = Options());

  // Returns the object providing and possibly owning the `Chain` being written
  // to.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Chain* DestChain() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 private:
  class Mover;

  // The object providing and possibly owning the `Chain` being written to, with
  // uninitialized space prepended (possibly empty); `cursor()` points to the
  // end of the uninitialized space, except that it can be `nullptr` if the
  // uninitialized space is empty.
  MovingDependency<Chain*, Dest, Mover> dest_;
};

explicit ChainBackwardWriter(Closed) -> ChainBackwardWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit ChainBackwardWriter(Dest&& dest,
                             ChainBackwardWriterBase::Options options =
                                 ChainBackwardWriterBase::Options())
    -> ChainBackwardWriter<std::conditional_t<
        std::conjunction_v<
            std::is_lvalue_reference<Dest>,
            std::is_convertible<std::remove_reference_t<Dest>*, const Chain*>>,
        DeleteCtad<Dest&&>, TargetT<Dest>>>;
explicit ChainBackwardWriter(ChainBackwardWriterBase::Options options =
                                 ChainBackwardWriterBase::Options())
    -> ChainBackwardWriter<Chain>;

// Implementation details follow.

inline ChainBackwardWriterBase::ChainBackwardWriterBase(const Options& options)
    : options_(Chain::Options()
                   .set_min_block_size(options.min_block_size())
                   .set_max_block_size(options.max_block_size())) {}

inline ChainBackwardWriterBase::ChainBackwardWriterBase(
    ChainBackwardWriterBase&& that) noexcept
    : BackwardWriter(static_cast<BackwardWriter&&>(that)),
      options_(that.options_) {}

inline ChainBackwardWriterBase& ChainBackwardWriterBase::operator=(
    ChainBackwardWriterBase&& that) noexcept {
  BackwardWriter::operator=(static_cast<BackwardWriter&&>(that));
  options_ = that.options_;
  return *this;
}

inline void ChainBackwardWriterBase::Reset(Closed) {
  BackwardWriter::Reset(kClosed);
  options_ = Chain::Options();
}

inline void ChainBackwardWriterBase::Reset(const Options& options) {
  BackwardWriter::Reset();
  options_ = Chain::Options()
                 .set_min_block_size(options.min_block_size())
                 .set_max_block_size(options.max_block_size());
}

inline void ChainBackwardWriterBase::Initialize(Chain* dest, bool prepend) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of ChainBackwardWriter: null Chain pointer";
  if (prepend) {
    set_start_pos(dest->size());
  } else {
    dest->Clear();
  }
}

template <typename Dest>
class ChainBackwardWriter<Dest>::Mover {
 public:
  static auto member() { return &ChainBackwardWriter::dest_; }

  explicit Mover(ChainBackwardWriter& self, ChainBackwardWriter& that)
      : uses_buffer_(self.start() != nullptr),
        start_to_cursor_(self.start_to_cursor()) {
    if (uses_buffer_) {
      RIEGELI_ASSERT_EQ(that.dest_->blocks().front().data(), self.limit())
          << "ChainBackwardWriter destination changed unexpectedly";
      RIEGELI_ASSERT_EQ(that.dest_->size(), self.limit_pos())
          << "ChainBackwardWriter destination changed unexpectedly";
    }
  }

  void Done(ChainBackwardWriter& self) {
    if (uses_buffer_) {
      const size_t buffer_size =
          self.dest_->size() - IntCast<size_t>(self.start_pos());
      const absl::string_view first_block = self.dest_->blocks().front();
      self.set_buffer(const_cast<char*>(first_block.data()), buffer_size,
                      start_to_cursor_);
    }
  }

 private:
  bool uses_buffer_;
  size_t start_to_cursor_;
};

template <typename Dest>
inline ChainBackwardWriter<Dest>::ChainBackwardWriter(Initializer<Dest> dest,
                                                      Options options)
    : ChainBackwardWriterBase(options), dest_(std::move(dest)) {
  Initialize(dest_.get(), options.prepend());
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<std::is_same_v<DependentDest, Chain>, int>>
inline ChainBackwardWriter<Dest>::ChainBackwardWriter(Options options)
    : ChainBackwardWriter(riegeli::Maker(), std::move(options)) {}

template <typename Dest>
inline void ChainBackwardWriter<Dest>::Reset(Closed) {
  ChainBackwardWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void ChainBackwardWriter<Dest>::Reset(Initializer<Dest> dest,
                                             Options options) {
  ChainBackwardWriterBase::Reset(options);
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options.prepend());
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<std::is_same_v<DependentDest, Chain>, int>>
inline void ChainBackwardWriter<Dest>::Reset(Options options) {
  Reset(riegeli::Maker(), std::move(options));
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_CHAIN_BACKWARD_WRITER_H_


================================================
FILE: riegeli/bytes/chain_reader.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/chain_reader.h"

#include <stddef.h>

#include <limits>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/pullable_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void ChainReaderBase::Done() {
  PullableReader::Done();
  iter_ = Chain::BlockIterator();
}

bool ChainReaderBase::PullBehindScratch(size_t recommended_length) {
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of PullableReader::PullBehindScratch(): "
         "enough data available, use Pull() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::PullBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const Chain& src = *iter_.chain();
  RIEGELI_ASSERT_LE(limit_pos(), src.size())
      << "ChainReader source changed unexpectedly";
  if (ABSL_PREDICT_FALSE(iter_ == src.blocks().cend())) return false;
  while (++iter_ != src.blocks().cend()) {
    if (ABSL_PREDICT_TRUE(!iter_->empty())) {
      RIEGELI_ASSERT_LE(iter_->size(), src.size() - limit_pos())
          << "ChainReader source changed unexpectedly";
      set_buffer(iter_->data(), iter_->size());
      move_limit_pos(available());
      return true;
    }
  }
  set_buffer();
  return false;
}

bool ChainReaderBase::ReadBehindScratch(size_t length, Chain& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of PullableReader::ReadBehindScratch(Chain&): "
         "enough data available, use Read(Chain&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of PullableReader::ReadBehindScratch(Chain&): "
         "Chain size overflow";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::ReadBehindScratch(Chain&): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const Chain& src = *iter_.chain();
  RIEGELI_ASSERT_LE(limit_pos(), src.size())
      << "ChainReader source changed unexpectedly";
  if (length <= available()) {
    dest.Append(ExternalRef(*iter_, absl::string_view(cursor(), length)));
    move_cursor(length);
    return true;
  }
  if (ABSL_PREDICT_FALSE(iter_ == src.blocks().cend())) return false;
  dest.Append(ExternalRef(*iter_, absl::string_view(cursor(), available())));
  length -= available();
  while (++iter_ != src.blocks().cend()) {
    RIEGELI_ASSERT_LE(iter_->size(), src.size() - limit_pos())
        << "ChainReader source changed unexpectedly";
    move_limit_pos(iter_->size());
    if (length <= iter_->size()) {
      set_buffer(iter_->data(), iter_->size(), length);
      dest.Append(ExternalRef(*iter_, absl::string_view(start(), length)));
      return true;
    }
    dest.Append(*iter_);
    length -= iter_->size();
  }
  set_buffer();
  return false;
}

bool ChainReaderBase::ReadBehindScratch(size_t length, absl::Cord& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of PullableReader::ReadBehindScratch(Cord&): "
         "enough data available, use Read(Cord&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of PullableReader::ReadBehindScratch(Cord&): "
         "Cord size overflow";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::ReadBehindScratch(Cord&): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const Chain& src = *iter_.chain();
  RIEGELI_ASSERT_LE(limit_pos(), src.size())
      << "ChainReader source changed unexpectedly";
  if (length <= available()) {
    ExternalRef(*iter_, absl::string_view(cursor(), length)).AppendTo(dest);
    move_cursor(length);
    return true;
  }
  if (ABSL_PREDICT_FALSE(iter_ == src.blocks().cend())) return false;
  ExternalRef(*iter_, absl::string_view(cursor(), available())).AppendTo(dest);
  length -= available();
  while (++iter_ != src.blocks().cend()) {
    RIEGELI_ASSERT_LE(iter_->size(), src.size() - limit_pos())
        << "ChainReader source changed unexpectedly";
    move_limit_pos(iter_->size());
    if (length <= iter_->size()) {
      set_buffer(iter_->data(), iter_->size(), length);
      ExternalRef(*iter_, absl::string_view(start(), length)).AppendTo(dest);
      return true;
    }
    ExternalRef(*iter_).AppendTo(dest);
    length -= iter_->size();
  }
  set_buffer();
  return false;
}

bool ChainReaderBase::CopyBehindScratch(Position length, Writer& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of PullableReader::CopyBehindScratch(Writer&): "
         "enough data available, use Copy(Writer&) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::CopyBehindScratch(Writer&): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const Chain& src = *iter_.chain();
  RIEGELI_ASSERT_LE(limit_pos(), src.size())
      << "ChainReader source changed unexpectedly";
  const size_t length_to_copy =
      UnsignedMin(length, src.size() - IntCast<size_t>(pos()));
  if (length_to_copy == src.size()) {
    RIEGELI_EVAL_ASSERT(Skip(length_to_copy));
    if (ABSL_PREDICT_FALSE(!dest.Write(src))) return false;
  } else if (length_to_copy <= kMaxBytesToCopy) {
    if (ABSL_PREDICT_FALSE(!dest.Push(length_to_copy))) return false;
    RIEGELI_EVAL_ASSERT(Read(length_to_copy, dest.cursor()));
    dest.move_cursor(length_to_copy);
  } else {
    Chain data;
    RIEGELI_EVAL_ASSERT(Read(length_to_copy, data));
    if (ABSL_PREDICT_FALSE(!dest.Write(std::move(data)))) return false;
  }
  return length_to_copy == length;
}

bool ChainReaderBase::CopyBehindScratch(size_t length, BackwardWriter& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of "
         "PullableReader::CopyBehindScratch(BackwardWriter&): "
         "enough data available, use Copy(BackwardWriter&) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PullableReader::CopyBehindScratch(BackwardWriter&): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const Chain& src = *iter_.chain();
  RIEGELI_ASSERT_LE(limit_pos(), src.size())
      << "ChainReader source changed unexpectedly";
  if (ABSL_PREDICT_FALSE(length > src.size() - pos())) {
    RIEGELI_EVAL_ASSERT(Seek(src.size()));
    return false;
  }
  if (length == src.size()) {
    RIEGELI_EVAL_ASSERT(Skip(length));
    return dest.Write(src);
  }
  if (length <= kMaxBytesToCopy) {
    if (ABSL_PREDICT_FALSE(!dest.Push(length))) return false;
    dest.move_cursor(length);
    if (ABSL_PREDICT_FALSE(!ReadBehindScratch(length, dest.cursor()))) {
      dest.set_cursor(dest.cursor() + length);
      return false;
    }
    return true;
  }
  Chain data;
  RIEGELI_EVAL_ASSERT(ReadBehindScratch(length, data));
  return dest.Write(std::move(data));
}

bool ChainReaderBase::SeekBehindScratch(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of PullableReader::SeekBehindScratch(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::SeekBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const Chain& src = *iter_.chain();
  RIEGELI_ASSERT_LE(limit_pos(), src.size())
      << "ChainReader source changed unexpectedly";
  if (new_pos >= src.size()) {
    // Source ends.
    iter_ = src.blocks().cend();
    set_limit_pos(src.size());
    set_buffer();
    return new_pos == src.size();
  }
  const Chain::BlockAndChar block_and_char =
      src.BlockAndCharIndex(IntCast<size_t>(new_pos));
  iter_ = block_and_char.block_iter;
  set_buffer(iter_->data(), iter_->size(), block_and_char.char_index);
  set_limit_pos(new_pos + available());
  return true;
}

std::optional<Position> ChainReaderBase::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  const Chain& src = *iter_.chain();
  return src.size();
}

std::unique_ptr<Reader> ChainReaderBase::NewReaderImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point.
  const Chain& src = *iter_.chain();
  std::unique_ptr<Reader> reader = std::make_unique<ChainReader<>>(&src);
  reader->Seek(initial_pos);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/chain_reader.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_CHAIN_READER_H_
#define RIEGELI_BYTES_CHAIN_READER_H_

#include <stddef.h>

#include <memory>
#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/cord.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/pullable_reader.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

class BackwardWriter;
class Writer;

// Template parameter independent part of `ChainReader`.
class ChainReaderBase : public PullableReader {
 public:
  // Returns the `Chain` being read from. Unchanged by `Close()`.
  virtual const Chain* SrcChain() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool ToleratesReadingAhead() override { return true; }
  bool SupportsRandomAccess() override { return true; }
  bool SupportsNewReader() override { return true; }

 protected:
  using PullableReader::PullableReader;

  ChainReaderBase(ChainReaderBase&& that) noexcept;
  ChainReaderBase& operator=(ChainReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(const Chain* src);
  Chain::BlockIterator iter() const { return iter_; }
  void set_iter(Chain::BlockIterator iter) { iter_ = iter; }

  void Done() override;
  bool PullBehindScratch(size_t recommended_length) override;
  using PullableReader::ReadBehindScratch;
  bool ReadBehindScratch(size_t length, Chain& dest) override;
  bool ReadBehindScratch(size_t length, absl::Cord& dest) override;
  using PullableReader::CopyBehindScratch;
  bool CopyBehindScratch(Position length, Writer& dest) override;
  bool CopyBehindScratch(size_t length, BackwardWriter& dest) override;
  bool SeekBehindScratch(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;

 private:
  // Invariant: `iter_.chain() == (is_open() ? SrcChain() : nullptr)`
  Chain::BlockIterator iter_;

  // Invariants if `is_open()` and scratch is not used:
  //   `start() ==
  //       (iter_ == SrcChain()->blocks().cend() ? nullptr : iter_->data())`
  //   `start_to_limit() ==
  //       (iter_ == SrcChain()->blocks().cend() ? 0 : iter_->size())`
  //   `start_pos()` is the position of `iter_` in `*SrcChain()`
};

// A `Reader` which reads from a `Chain`.
//
// It supports random access and `NewReader()`.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the `Chain` being read from. `Src` must support
// `Dependency<const Chain*, Src>`, e.g. `const Chain*` (not owned, default),
// `Chain` (owned), `Any<const Chain*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The `Chain` must not be changed until the `ChainReader` is closed or no
// longer used.
template <typename Src = const Chain*>
class ChainReader : public ChainReaderBase {
 public:
  // Creates a closed `ChainReader`.
  explicit ChainReader(Closed) noexcept : ChainReaderBase(kClosed) {}

  // Will read from the `Chain` provided by `src`.
  explicit ChainReader(Initializer<Src> src);

  ChainReader(ChainReader&& that) = default;
  ChainReader& operator=(ChainReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `ChainReader`. This avoids
  // constructing a temporary `ChainReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src);

  // Returns the object providing and possibly owning the `Chain` being read
  // from. Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  const Chain* SrcChain() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 private:
  class Mover;

  // The object providing and possibly owning the `Chain` being read from.
  MovingDependency<const Chain*, Src, Mover> src_;
};

explicit ChainReader(Closed) -> ChainReader<DeleteCtad<Closed>>;
template <typename Src>
explicit ChainReader(Src&& src) -> ChainReader<TargetT<Src>>;

// Implementation details follow.

inline ChainReaderBase::ChainReaderBase(ChainReaderBase&& that) noexcept
    : PullableReader(static_cast<PullableReader&&>(that)),
      iter_(std::exchange(that.iter_, Chain::BlockIterator())) {}

inline ChainReaderBase& ChainReaderBase::operator=(
    ChainReaderBase&& that) noexcept {
  PullableReader::operator=(static_cast<PullableReader&&>(that));
  iter_ = std::exchange(that.iter_, Chain::BlockIterator());
  return *this;
}

inline void ChainReaderBase::Reset(Closed) {
  PullableReader::Reset(kClosed);
  iter_ = Chain::BlockIterator();
}

inline void ChainReaderBase::Reset() {
  PullableReader::Reset();
  // `iter_` will be set by `Initialize()`.
}

inline void ChainReaderBase::Initialize(const Chain* src) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of ChainReader: null Chain pointer";
  iter_ = src->blocks().cbegin();
  if (iter_ != src->blocks().cend()) {
    set_buffer(iter_->data(), iter_->size());
    move_limit_pos(available());
  }
}

template <typename Src>
class ChainReader<Src>::Mover {
 public:
  static auto member() { return &ChainReader::src_; }

  explicit Mover(ChainReader& self)
      : behind_scratch_(&self),
        has_chain_(self.iter().chain() != nullptr),
        block_index_(self.iter().block_index()),
        uses_buffer_(self.start() != nullptr),
        start_to_cursor_(self.start_to_cursor()) {
    if (uses_buffer_) {
      RIEGELI_ASSERT_EQ(self.iter()->data(), self.start())
          << "ChainReader source changed unexpectedly";
      RIEGELI_ASSERT_EQ(self.iter()->size(), self.start_to_limit())
          << "ChainReader source changed unexpectedly";
    }
  }

  void Done(ChainReader& self) {
    if (has_chain_) {
      self.set_iter(Chain::BlockIterator(self.src_.get(), block_index_));
      if (uses_buffer_) {
        self.set_buffer(self.iter()->data(), self.iter()->size(),
                        start_to_cursor_);
      }
    }
  }

 private:
  BehindScratch behind_scratch_;
  bool has_chain_;
  size_t block_index_;
  bool uses_buffer_;
  size_t start_to_cursor_;
};

template <typename Src>
inline ChainReader<Src>::ChainReader(Initializer<Src> src)
    : src_(std::move(src)) {
  Initialize(src_.get());
}

template <typename Src>
inline void ChainReader<Src>::Reset(Closed) {
  ChainReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void ChainReader<Src>::Reset(Initializer<Src> src) {
  ChainReaderBase::Reset();
  src_.Reset(std::move(src));
  Initialize(src_.get());
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_CHAIN_READER_H_


================================================
FILE: riegeli/bytes/chain_writer.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/chain_writer.h"

#include <stddef.h>

#include <limits>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/chain_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void ChainWriterBase::Done() {
  ChainWriterBase::FlushImpl(FlushType::kFromObject);
  Writer::Done();
  tail_.reset();
  associated_reader_.Reset();
}

inline void ChainWriterBase::SyncBuffer(Chain& dest) {
  if (ABSL_PREDICT_FALSE(HasAppendedTail(dest))) {
    ExtractTail(dest);
    return;
  }
  ShrinkTail(start_to_cursor());
  set_start_pos(pos());
  dest.RemoveSuffix(available(), options_);
  set_buffer();
}

inline void ChainWriterBase::MakeBuffer(Chain& dest, size_t min_length,
                                        size_t recommended_length) {
  const absl::Span<char> buffer = dest.AppendBuffer(
      min_length, recommended_length, Chain::kAnyLength, options_);
  set_buffer(buffer.data(), buffer.size());
}

inline void ChainWriterBase::MoveFromTail(size_t length, Chain& dest) {
  RIEGELI_ASSERT_NE(tail_, nullptr)
      << "Failed precondition of ChainWriterBase::MoveFromTail(): no tail";
  RIEGELI_ASSERT_LE(length, tail_->size())
      << "Failed precondition of ChainWriterBase::MoveFromTail(): "
         "length longer than the tail";
  if (length == tail_->size()) {
    dest.Append(std::move(*tail_), options_);
    tail_->Clear();
    return;
  }
  Chain::BlockIterator iter = tail_->blocks().cbegin();
  size_t remaining = length;
  while (remaining > iter->size()) {
    dest.Append(*iter, options_);
    remaining -= iter->size();
    ++iter;
  }
  dest.Append(ExternalRef(*iter, absl::string_view(iter->data(), remaining)),
              options_);
  tail_->RemovePrefix(length, options_);
}

inline void ChainWriterBase::MoveToTail(size_t length, Chain& dest) {
  RIEGELI_ASSERT_LE(length, dest.size())
      << "Failed precondition of ChainWriterBase::MoveToTail(): "
         "length longer than the destination";
  if (tail_ == nullptr) tail_ = std::make_unique<Chain>();
  if (length == dest.size()) {
    tail_->Prepend(std::move(dest), options_);
    dest.Clear();
    return;
  }
  Chain::BlockIterator iter = dest.blocks().cend();
  size_t remaining = length;
  for (;;) {
    --iter;
    if (remaining <= iter->size()) break;
    tail_->Prepend(*iter, options_);
    remaining -= iter->size();
  }
  tail_->Prepend(ExternalRef(*iter, absl::string_view(
                                        iter->data() + iter->size() - remaining,
                                        remaining)),
                 options_);
  dest.RemoveSuffix(length, options_);
}

inline bool ChainWriterBase::HasAppendedTail(const Chain& dest) const {
  return limit_pos() < dest.size();
}

inline void ChainWriterBase::ExtractTail(Chain& dest) {
  RIEGELI_ASSERT(HasAppendedTail(dest))
      << "Failed precondition of ChainWriterBase::ExtractTail(): "
         "the tail is not appended";
  RIEGELI_ASSERT_EQ(start(), nullptr)
      << "Failed invariant of ChainWriterBase: "
         "both a buffer and the appended tail are present";
  MoveToTail(dest.size() - IntCast<size_t>(start_pos()), dest);
}

inline void ChainWriterBase::AppendTail(Chain& dest) {
  RIEGELI_ASSERT(!HasAppendedTail(dest))
      << "Failed precondition of ChainWriterBase::AppendTail(): "
         "the tail is appended";
  if (ABSL_PREDICT_FALSE(tail_ != nullptr)) {
    dest.Append(std::move(*tail_), options_);
    tail_->Clear();
  }
}

inline void ChainWriterBase::ShrinkTail(size_t length) {
  if (ABSL_PREDICT_FALSE(tail_ != nullptr)) {
    tail_->RemovePrefix(UnsignedMin(length, tail_->size()), options_);
  }
}

void ChainWriterBase::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  if (write_size_hint == std::nullopt) {
    options_.set_size_hint(std::nullopt);
  } else {
    options_.set_size_hint(
        SaturatingIntCast<size_t>(SaturatingAdd(pos(), *write_size_hint)));
  }
}

bool ChainWriterBase::PushSlow(size_t min_length, size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Writer::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_LE(limit_pos(), dest.size())
      << "ChainWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(min_length > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  MakeBuffer(dest, min_length, recommended_length);
  return true;
}

bool ChainWriterBase::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_LE(limit_pos(), dest.size())
      << "ChainWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  ShrinkTail(src.size());
  move_start_pos(src.size());
  dest.Append(std::move(src), options_);
  MakeBuffer(dest);
  return true;
}

bool ChainWriterBase::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_LE(limit_pos(), dest.size())
      << "ChainWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  ShrinkTail(src.size());
  move_start_pos(src.size());
  dest.Append(src, options_);
  MakeBuffer(dest);
  return true;
}

bool ChainWriterBase::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_LE(limit_pos(), dest.size())
      << "ChainWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  ShrinkTail(src.size());
  move_start_pos(src.size());
  dest.Append(std::move(src), options_);
  MakeBuffer(dest);
  return true;
}

bool ChainWriterBase::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_LE(limit_pos(), dest.size())
      << "ChainWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  ShrinkTail(src.size());
  move_start_pos(src.size());
  dest.Append(src, options_);
  MakeBuffer(dest);
  return true;
}

bool ChainWriterBase::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_LE(limit_pos(), dest.size())
      << "ChainWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  ShrinkTail(src.size());
  move_start_pos(src.size());
  dest.Append(std::move(src), options_);
  MakeBuffer(dest);
  return true;
}

bool ChainWriterBase::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_LE(limit_pos(), dest.size())
      << "ChainWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  ShrinkTail(IntCast<size_t>(src.size()));
  move_start_pos(src.size());
  src.AppendTo(dest, options_);
  MakeBuffer(dest);
  return true;
}

bool ChainWriterBase::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_LE(limit_pos(), dest.size())
      << "ChainWriter destination changed unexpectedly";
  if (ABSL_PREDICT_FALSE(HasAppendedTail(dest))) {
    RIEGELI_ASSERT_EQ(start(), nullptr)
        << "Failed invariant of ChainWriterBase: "
           "both a buffer and the appended tail are present";
    RIEGELI_ASSERT(tail_ == nullptr || tail_->empty())
        << "Failed invariant of ChainWriterBase: "
           "the tail is both appended and separated";
    return true;
  }
  SyncBuffer(dest);
  AppendTail(dest);
  return true;
}

bool ChainWriterBase::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT_NE(new_pos, pos())
      << "Failed precondition of Writer::SeekSlow(): "
         "position unchanged, use Seek() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_LE(limit_pos(), dest.size())
      << "ChainWriter destination changed unexpectedly";
  if (ABSL_PREDICT_FALSE(HasAppendedTail(dest))) {
    RIEGELI_ASSERT_EQ(start(), nullptr)
        << "Failed invariant of ChainWriterBase: "
           "both a buffer and the appended tail are present";
    RIEGELI_ASSERT(tail_ == nullptr || tail_->empty())
        << "Failed invariant of ChainWriterBase: "
           "the tail is both appended and separated";
    if (ABSL_PREDICT_FALSE(new_pos > dest.size())) {
      set_start_pos(dest.size());
      return false;
    }
    MoveToTail(dest.size() - IntCast<size_t>(new_pos), dest);
    set_start_pos(new_pos);
    return true;
  }
  if (new_pos > pos()) {
    if (ABSL_PREDICT_TRUE(tail_ == nullptr) || tail_->empty()) return false;
    SyncBuffer(dest);
    if (ABSL_PREDICT_FALSE(new_pos > dest.size() + tail_->size())) {
      AppendTail(dest);
      set_start_pos(dest.size());
      return false;
    }
    MoveFromTail(IntCast<size_t>(new_pos) - dest.size(), dest);
  } else {
    SyncBuffer(dest);
    MoveToTail(dest.size() - IntCast<size_t>(new_pos), dest);
  }
  set_start_pos(new_pos);
  return true;
}

std::optional<Position> ChainWriterBase::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_LE(limit_pos(), dest.size())
      << "ChainWriter destination changed unexpectedly";
  if (ABSL_PREDICT_FALSE(HasAppendedTail(dest))) {
    RIEGELI_ASSERT_EQ(start(), nullptr)
        << "Failed invariant of ChainWriterBase: "
           "both a buffer and the appended tail are present";
    RIEGELI_ASSERT(tail_ == nullptr || tail_->empty())
        << "Failed invariant of ChainWriterBase: "
           "the tail is both appended and separated";
    return dest.size();
  }
  if (ABSL_PREDICT_FALSE(tail_ != nullptr)) {
    return UnsignedMax(pos(), start_pos() + tail_->size());
  }
  return pos();
}

bool ChainWriterBase::TruncateImpl(Position new_size) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain& dest = *DestChain();
  RIEGELI_ASSERT_LE(limit_pos(), dest.size())
      << "ChainWriter destination changed unexpectedly";
  if (ABSL_PREDICT_FALSE(HasAppendedTail(dest))) {
    RIEGELI_ASSERT_EQ(start(), nullptr)
        << "Failed invariant of ChainWriterBase: "
           "both a buffer and the appended tail are present";
    RIEGELI_ASSERT(tail_ == nullptr || tail_->empty())
        << "Failed invariant of ChainWriterBase: "
           "the tail is both appended and separated";
    if (ABSL_PREDICT_FALSE(new_size > dest.size())) {
      set_start_pos(dest.size());
      return false;
    }
  } else if (new_size > pos()) {
    if (ABSL_PREDICT_TRUE(tail_ == nullptr) || tail_->empty()) return false;
    SyncBuffer(dest);
    if (ABSL_PREDICT_FALSE(new_size > dest.size() + tail_->size())) {
      move_start_pos(tail_->size());
      AppendTail(dest);
      return false;
    }
    set_start_pos(new_size);
    tail_->RemoveSuffix(dest.size() + tail_->size() - IntCast<size_t>(new_size),
                        options_);
    AppendTail(dest);
    return true;
  } else {
    if (ABSL_PREDICT_FALSE(tail_ != nullptr)) tail_->Clear();
    if (new_size >= start_pos()) {
      set_cursor(start() + (new_size - start_pos()));
      return true;
    }
  }
  RIEGELI_ASSERT(tail_ == nullptr || tail_->empty());
  RIEGELI_ASSERT_LE(new_size, dest.size());
  set_start_pos(new_size);
  dest.RemoveSuffix(dest.size() - IntCast<size_t>(new_size), options_);
  set_buffer();
  return true;
}

Reader* ChainWriterBase::ReadModeImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ChainWriterBase::FlushImpl(FlushType::kFromObject))) {
    return nullptr;
  }
  Chain& dest = *DestChain();
  ChainReader<>* const reader = associated_reader_.ResetReader(&dest);
  reader->Seek(initial_pos);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/chain_writer.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_CHAIN_WRITER_H_
#define RIEGELI_BYTES_CHAIN_WRITER_H_

#include <stddef.h>
#include <stdint.h>

#include <memory>
#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

template <typename Src>
class ChainReader;
class Reader;

// Template parameter independent part of `ChainWriter`.
class ChainWriterBase : public Writer {
 public:
  class Options {
   public:
    Options() noexcept {}

    // If `false`, replaces existing contents of the destination, clearing it
    // first.
    //
    // If `true`, appends to existing contents of the destination.
    //
    // Default: `false`.
    Options& set_append(bool append) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      append_ = append;
      return *this;
    }
    Options&& set_append(bool append) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_append(append));
    }
    bool append() const { return append_; }

    // Minimal size of a block of allocated data.
    //
    // This is used initially, while the destination is small.
    //
    // Default: `kDefaultMinBlockSize` (512).
    Options& set_min_block_size(size_t min_block_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      min_block_size_ = UnsignedMin(min_block_size, uint32_t{1} << 31);
      return *this;
    }
    Options&& set_min_block_size(size_t min_block_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_min_block_size(min_block_size));
    }
    size_t min_block_size() const { return min_block_size_; }

    // Maximal size of a block of allocated data.
    //
    // This is for performance tuning, not a guarantee: does not apply to
    // objects allocated separately and then written to this `ChainWriter`.
    //
    // Default: `kDefaultMaxBlockSize` (64K).
    Options& set_max_block_size(size_t max_block_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GT(max_block_size, 0u)
          << "Failed precondition of "
             "ChainWriterBase::Options::set_max_block_size(): "
             "zero block size";
      max_block_size_ = UnsignedMin(max_block_size, uint32_t{1} << 31);
      return *this;
    }
    Options&& set_max_block_size(size_t max_block_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_max_block_size(max_block_size));
    }
    size_t max_block_size() const { return max_block_size_; }

    // A shortcut for `set_min_block_size(block_size)` with
    // `set_max_block_size(block_size)`.
    Options& set_block_size(size_t block_size) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return set_min_block_size(block_size).set_max_block_size(block_size);
    }
    Options&& set_block_size(size_t block_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_block_size(block_size));
    }

   private:
    bool append_ = false;
    // Use `uint32_t` instead of `size_t` to reduce the object size.
    uint32_t min_block_size_ = uint32_t{kDefaultMinBlockSize};
    uint32_t max_block_size_ = uint32_t{kDefaultMaxBlockSize};
  };

  // Returns the `Chain` being written to. Unchanged by `Close()`.
  virtual Chain* DestChain() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;
  Chain& Digest() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    Flush();
    return *DestChain();
  }

  bool SupportsRandomAccess() override { return true; }
  bool SupportsReadMode() override { return true; }

 protected:
  explicit ChainWriterBase(Closed) noexcept : Writer(kClosed) {}

  explicit ChainWriterBase(const Options& options);

  ChainWriterBase(ChainWriterBase&& that) noexcept;
  ChainWriterBase& operator=(ChainWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(const Options& options);
  void Initialize(Chain* dest, bool append);

  void Done() override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using Writer::WriteSlow;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(absl::Cord&& src) override;
  bool WriteSlow(ByteFill src) override;
  bool FlushImpl(FlushType flush_type) override;
  bool SeekSlow(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  bool TruncateImpl(Position new_size) override;
  Reader* ReadModeImpl(Position initial_pos) override;

 private:
  // Discards uninitialized space from the end of `dest`, so that it contains
  // only actual data written. Ensures that data which follow the current
  // position are separated in `*tail_`.
  void SyncBuffer(Chain& dest);

  // Appends uninitialized space to `dest`.
  void MakeBuffer(Chain& dest, size_t min_length = 0,
                  size_t recommended_length = 0);

  // Moves `length` of data from the beginning of `*tail_` to the end of `dest`.
  void MoveFromTail(size_t length, Chain& dest);

  // Moves `length` of data from the end of `dest` to the beginning of `*tail_`.
  void MoveToTail(size_t length, Chain& dest);

  // Returns `true` if data which follow the current position are appended to
  // `dest`.
  bool HasAppendedTail(const Chain& dest) const;

  // Moves data which follow the current position from being appended to `dest`
  // to being separated in `*tail_`.
  void ExtractTail(Chain& dest);

  // Moves data which follow the current position from being separated in
  // `*tail_` to being appended to `dest`.
  void AppendTail(Chain& dest);

  // Removes a prefix of `*tail_` of the given `length`, staturated at clearing
  // the whole `*tail_`.
  void ShrinkTail(size_t length);

  Chain::Options options_;

  // If `limit_pos() < DestChain()->size()`, then data after the current
  // position are appended to `*DestChain()`, buffer pointers are `nullptr`,
  // and `tail_ == nullptr || tail_->empty()`.
  //
  // Otherwise, if `limit_pos() == DestChain()->size() && tail_ != nullptr`,
  // data after the current position are separated in `*tail_`, ignoring a
  // prefix of `*tail_` with length `start_to_cursor()`, saturated at ignoring
  // the whole `*tail_` (the ignored prefix is being overwritten with buffered
  // data).
  //
  // Otherwise `limit_pos() == DestChain()->size() && tail_ == nullptr`, and
  // there are no data after the current position.
  //
  // `tail_` is stored behind `std::unique_ptr` to reduce the object size in the
  // common case when random access is not used.
  std::unique_ptr<Chain> tail_;

  AssociatedReader<ChainReader<const Chain*>> associated_reader_;

  // Invariants if `ok()`:
  //   `limit() == nullptr || limit() == DestChain()->blocks().back().data() +
  //                                     DestChain()->blocks().back().size()`
  //   `limit_pos() <= DestChain()->size()`
  //   if `limit_pos() < DestChain()->size()` then
  //       `start() == nullptr && (tail_ == nullptr || tail_->empty())`
};

// A `Writer` which writes to a `Chain`. If `Options::append()` is `false`
// (the default), replaces existing contents of the `Chain`, clearing it first.
// If `Options::append()` is `true`, appends to existing contents of the
// `Chain`.
//
// It supports `Seek()` and `ReadMode()`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the `Chain` being written to. `Dest` must support
// `Dependency<Chain*, Dest>`, e.g. `Chain*` (not owned, default),
// `Chain` (owned), `Any<Chain*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `Chain` if there
// are no constructor arguments or the only argument is `Options`, otherwise as
// `TargetT` of the type of the first constructor argument, except that CTAD
// is deleted if the first constructor argument is a `Chain&` or `const Chain&`
// (to avoid writing to an unintentionally separate copy of an existing object).
//
//
// The `Chain` must not be accessed until the `ChainWriter` is closed or no
// longer used, except that it is allowed to read the `Chain` immediately after
// `Flush()`.
template <typename Dest = Chain*>
class ChainWriter : public ChainWriterBase {
 public:
  // Creates a closed `ChainWriter`.
  explicit ChainWriter(Closed) noexcept : ChainWriterBase(kClosed) {}

  // Will write to the `Chain` provided by `dest`.
  explicit ChainWriter(Initializer<Dest> dest, Options options = Options());

  // Will write to an owned `Chain` which can be accessed by `dest()`.
  // This constructor is present only if `Dest` is `Chain`.
  template <typename DependentDest = Dest,
            std::enable_if_t<std::is_same_v<DependentDest, Chain>, int> = 0>
  explicit ChainWriter(Options options = Options());

  ChainWriter(ChainWriter&& that) = default;
  ChainWriter& operator=(ChainWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `ChainWriter`. This avoids
  // constructing a temporary `ChainWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());
  template <typename DependentDest = Dest,
            std::enable_if_t<std::is_same_v<DependentDest, Chain>, int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Options options = Options());

  // Returns the object providing and possibly owning the `Chain` being written
  // to. Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Chain* DestChain() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 private:
  class Mover;

  // The object providing and possibly owning the `Chain` being written to, with
  // uninitialized space appended (possibly empty); `cursor()` points to the
  // uninitialized space, except that it can be `nullptr` if the uninitialized
  // space is empty.
  MovingDependency<Chain*, Dest, Mover> dest_;
};

explicit ChainWriter(Closed) -> ChainWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit ChainWriter(
    Dest&& dest, ChainWriterBase::Options options = ChainWriterBase::Options())
    -> ChainWriter<std::conditional_t<
        std::conjunction_v<
            std::is_lvalue_reference<Dest>,
            std::is_convertible<std::remove_reference_t<Dest>*, const Chain*>>,
        DeleteCtad<Dest&&>, TargetT<Dest>>>;
explicit ChainWriter(ChainWriterBase::Options options =
                         ChainWriterBase::Options()) -> ChainWriter<Chain>;

// Implementation details follow.

inline ChainWriterBase::ChainWriterBase(const Options& options)
    : options_(Chain::Options()
                   .set_min_block_size(options.min_block_size())
                   .set_max_block_size(options.max_block_size())) {}

inline ChainWriterBase::ChainWriterBase(ChainWriterBase&& that) noexcept
    : Writer(static_cast<Writer&&>(that)),
      options_(that.options_),
      tail_(std::move(that.tail_)),
      associated_reader_(std::move(that.associated_reader_)) {}

inline ChainWriterBase& ChainWriterBase::operator=(
    ChainWriterBase&& that) noexcept {
  Writer::operator=(static_cast<Writer&&>(that));
  options_ = that.options_;
  tail_ = std::move(that.tail_);
  associated_reader_ = std::move(that.associated_reader_);
  return *this;
}

inline void ChainWriterBase::Reset(Closed) {
  Writer::Reset(kClosed);
  options_ = Chain::Options();
  tail_.reset();
  associated_reader_.Reset();
}

inline void ChainWriterBase::Reset(const Options& options) {
  Writer::Reset();
  options_ = Chain::Options()
                 .set_min_block_size(options.min_block_size())
                 .set_max_block_size(options.max_block_size());
  if (tail_ != nullptr) tail_->Clear();
  associated_reader_.Reset();
}

inline void ChainWriterBase::Initialize(Chain* dest, bool append) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of ChainWriter: null Chain pointer";
  if (append) {
    set_start_pos(dest->size());
  } else {
    dest->Clear();
  }
}

template <typename Dest>
class ChainWriter<Dest>::Mover {
 public:
  static auto member() { return &ChainWriter::dest_; }

  explicit Mover(ChainWriter& self, ChainWriter& that)
      : uses_buffer_(self.start() != nullptr),
        start_to_cursor_(self.start_to_cursor()) {
    if (uses_buffer_) {
      RIEGELI_ASSERT(that.dest_->blocks().back().data() +
                         that.dest_->blocks().back().size() ==
                     self.limit())
          << "ChainWriter destination changed unexpectedly";
      RIEGELI_ASSERT_EQ(that.dest_->size(), self.limit_pos())
          << "ChainWriter destination changed unexpectedly";
    }
  }

  void Done(ChainWriter& self) {
    if (uses_buffer_) {
      const size_t buffer_size =
          self.dest_->size() - IntCast<size_t>(self.start_pos());
      const absl::string_view last_block = self.dest_->blocks().back();
      self.set_buffer(const_cast<char*>(last_block.data() + last_block.size()) -
                          buffer_size,
                      buffer_size, start_to_cursor_);
    }
  }

 private:
  bool uses_buffer_;
  size_t start_to_cursor_;
};

template <typename Dest>
inline ChainWriter<Dest>::ChainWriter(Initializer<Dest> dest, Options options)
    : ChainWriterBase(options), dest_(std::move(dest)) {
  Initialize(dest_.get(), options.append());
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<std::is_same_v<DependentDest, Chain>, int>>
inline ChainWriter<Dest>::ChainWriter(Options options)
    : ChainWriter(riegeli::Maker(), std::move(options)) {}

template <typename Dest>
inline void ChainWriter<Dest>::Reset(Closed) {
  ChainWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void ChainWriter<Dest>::Reset(Initializer<Dest> dest, Options options) {
  ChainWriterBase::Reset(options);
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options.append());
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<std::is_same_v<DependentDest, Chain>, int>>
inline void ChainWriter<Dest>::Reset(Options options) {
  Reset(riegeli::Maker(), std::move(options));
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_CHAIN_WRITER_H_


================================================
FILE: riegeli/bytes/compact_string_writer.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_COMPACT_STRING_WRITER_H_
#define RIEGELI_BYTES_COMPACT_STRING_WRITER_H_

#include <stddef.h>

#include <type_traits>

#include "riegeli/base/assert.h"
#include "riegeli/base/compact_string.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/bytes/resizable_writer.h"

namespace riegeli {

namespace compact_string_writer_internal {

// `ResizableTraits` for `CompactString`.
struct CompactStringResizableTraits {
  using Resizable = CompactString;
  static char* Data(Resizable& dest) { return dest.data(); }
  static size_t Size(const Resizable& dest) { return dest.size(); }
  static constexpr bool kIsStable = false;
  static bool Resize(Resizable& dest, size_t new_size, size_t used_size) {
    RIEGELI_ASSERT_LE(used_size, dest.size())
        << "Failed precondition of ResizableTraits::Resize(): "
           "used size exceeds old size";
    RIEGELI_ASSERT_LE(used_size, new_size)
        << "Failed precondition of ResizableTraits::Resize(): "
           "used size exceeds new size";
    dest.resize(new_size, used_size);
    return true;
  }
  static bool Grow(Resizable& dest, size_t new_size, size_t used_size) {
    RIEGELI_ASSERT_GT(new_size, dest.size())
        << "Failed precondition of ResizableTraits::Grow(): "
           "no need to grow";
    RIEGELI_ASSERT_LE(used_size, dest.size())
        << "Failed precondition of ResizableTraits::Grow(): "
           "used size exceeds old size";
    RIEGELI_ASSERT_LE(used_size, new_size)
        << "Failed precondition of ResizableTraits::Grow(): "
           "used size exceeds new size";
    dest.resize(new_size, used_size);
    dest.set_size(dest.capacity());
    return true;
  }
  static bool GrowUnderCapacity(Resizable& dest, size_t new_size) {
    RIEGELI_ASSERT_GT(new_size, dest.size())
        << "Failed precondition of ResizableTraits::GrowUnderCapacity(): "
           "no need to grow";
    if (new_size > dest.capacity()) return false;
    dest.set_size(dest.capacity());
    return true;
  }
};

}  // namespace compact_string_writer_internal

// Template parameter independent part of `CompactStringWriter`.
using CompactStringWriterBase = ResizableWriterBase;

// A `Writer` which writes to a `CompactString`. If `Options::append()` is
// `false` (the default), replaces existing contents of the `CompactString`,
// clearing it first. If `Options::append()` is `true`, appends to existing
// contents of the `CompactString`.
//
// It supports `Seek()` and `ReadMode()`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the `CompactString` being written to. `Dest` must support
// `Dependency<CompactString*, Dest>`, e.g.
// `CompactString*` (not owned, default), `CompactString` (owned),
// `Any<CompactString*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `CompactString`
// if there are no constructor arguments or the only argument is `Options`,
// otherwise as `TargetT` of the type of the first constructor argument, except
// that CTAD is deleted if the first constructor argument is a `CompactString&`
// or `const CompactString&` (to avoid writing to an unintentionally separate
// copy of an existing object).
//
// The `CompactString` must not be accessed until the `CompactStringWriter` is
// closed or no longer used, except that it is allowed to read the
// `CompactString` immediately after `Flush()`.
//
// `CompactStringWriter` is more efficient than `StringWriter`
// because the destination can be resized with uninitialized space.
// `VectorWriter` with `UninitializedVector<char>` or
// `UninitializedInlinedVector<char, inlined_size>` can also be used
// for this purpose.
template <typename Dest = CompactString*>
class CompactStringWriter
    : public ResizableWriter<
          compact_string_writer_internal::CompactStringResizableTraits, Dest> {
 public:
  using CompactStringWriter::ResizableWriter::ResizableWriter;

  CompactStringWriter(CompactStringWriter&& that) = default;
  CompactStringWriter& operator=(CompactStringWriter&& that) = default;
};

explicit CompactStringWriter(Closed) -> CompactStringWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit CompactStringWriter(Dest&& dest,
                             CompactStringWriterBase::Options options =
                                 CompactStringWriterBase::Options())
    -> CompactStringWriter<std::conditional_t<
        std::conjunction_v<std::is_lvalue_reference<Dest>,
                           std::is_convertible<std::remove_reference_t<Dest>*,
                                               const CompactString*>>,
        DeleteCtad<Dest&&>, TargetT<Dest>>>;
explicit CompactStringWriter(CompactStringWriterBase::Options options =
                                 CompactStringWriterBase::Options())
    -> CompactStringWriter<CompactString>;

}  // namespace riegeli

#endif  // RIEGELI_BYTES_COMPACT_STRING_WRITER_H_


================================================
FILE: riegeli/bytes/copy_all.cc
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/copy_all.h"

#include <stddef.h>

#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli::copy_all_internal {

namespace {

ABSL_ATTRIBUTE_COLD absl::Status MaxLengthExceeded(Reader& src,
                                                   Position max_length) {
  return src.AnnotateStatus(absl::ResourceExhaustedError(
      absl::StrCat("Maximum length exceeded: ", max_length)));
}

absl::Status CopyAllImpl(Reader& src, Writer& dest, Position max_length,
                         bool set_write_size_hint) {
  if (src.SupportsSize()) {
    const std::optional<Position> size = src.Size();
    if (ABSL_PREDICT_FALSE(size == std::nullopt)) return src.status();
    const Position remaining = SaturatingSub(*size, src.pos());
    if (ABSL_PREDICT_FALSE(remaining > max_length)) {
      if (set_write_size_hint) dest.SetWriteSizeHint(max_length);
      if (ABSL_PREDICT_FALSE(!src.Copy(max_length, dest))) {
        if (ABSL_PREDICT_FALSE(!dest.ok())) return dest.status();
        if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
        return absl::OkStatus();
      }
      return MaxLengthExceeded(src, max_length);
    }
    if (set_write_size_hint) dest.SetWriteSizeHint(remaining);
    if (ABSL_PREDICT_FALSE(!src.Copy(remaining, dest))) {
      if (ABSL_PREDICT_FALSE(!dest.ok())) return dest.status();
      if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
    }
  } else {
    Position remaining_max_length = max_length;
    do {
      if (ABSL_PREDICT_FALSE(src.available() > remaining_max_length)) {
        if (ABSL_PREDICT_FALSE(!src.Copy(remaining_max_length, dest))) {
          if (ABSL_PREDICT_FALSE(!dest.ok())) return dest.status();
        }
        return MaxLengthExceeded(src, max_length);
      }
      remaining_max_length -= src.available();
      if (ABSL_PREDICT_FALSE(!src.Copy(src.available(), dest))) {
        if (ABSL_PREDICT_FALSE(!dest.ok())) return dest.status();
      }
    } while (src.Pull());
    if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
  }
  return absl::OkStatus();
}

}  // namespace

absl::Status CopyAllImpl(Reader& src, Writer& dest, Position max_length,
                         Position* length_read, bool set_write_size_hint) {
  if (length_read == nullptr) {
    return CopyAllImpl(src, dest, max_length, set_write_size_hint);
  }
  const Position pos_before = src.pos();
  const absl::Status status =
      CopyAllImpl(src, dest, max_length, set_write_size_hint);
  RIEGELI_ASSERT_GE(src.pos(), pos_before)
      << "CopyAllImpl(Writer&) decreased src.pos()";
  RIEGELI_ASSERT_LE(src.pos() - pos_before, max_length)
      << "CopyAllImpl(Writer&) read more than requested";
  *length_read = src.pos() - pos_before;
  return status;
}

absl::Status CopyAllImpl(Reader& src, BackwardWriter& dest, size_t max_length,
                         bool set_write_size_hint) {
  if (src.SupportsSize()) {
    const std::optional<Position> size = src.Size();
    if (ABSL_PREDICT_FALSE(size == std::nullopt)) return src.status();
    const Position remaining = SaturatingSub(*size, src.pos());
    if (ABSL_PREDICT_FALSE(remaining > max_length)) {
      if (ABSL_PREDICT_FALSE(!src.Skip(max_length))) {
        if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
      }
      return MaxLengthExceeded(src, max_length);
    }
    if (set_write_size_hint) dest.SetWriteSizeHint(remaining);
    if (ABSL_PREDICT_FALSE(!src.Copy(IntCast<size_t>(remaining), dest))) {
      if (ABSL_PREDICT_FALSE(!dest.ok())) return dest.status();
      if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
    }
  } else {
    size_t remaining_max_length = max_length;
    Chain data;
    do {
      if (ABSL_PREDICT_FALSE(src.available() > remaining_max_length)) {
        src.move_cursor(remaining_max_length);
        return MaxLengthExceeded(src, max_length);
      }
      remaining_max_length -= src.available();
      src.ReadAndAppend(src.available(), data);
    } while (src.Pull());
    if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
    if (ABSL_PREDICT_FALSE(!dest.Write(std::move(data)))) return dest.status();
  }
  return absl::OkStatus();
}

}  // namespace riegeli::copy_all_internal


================================================
FILE: riegeli/bytes/copy_all.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_COPY_ALL_H_
#define RIEGELI_BYTES_COPY_ALL_H_

#include <stddef.h>

#include <limits>
#include <type_traits>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

// Combines creating a `Reader` and/or `Writer` / `BackwardWriter` (optionally),
// copying all remaining data, and `VerifyEndAndClose()` and/or `Close()`
// (if the `Reader` and/or `Writer` / `BackwardWriter` is owned).
//
// `CopyAll(Writer&)` writes as much as could be read if reading failed, reads
// an unspecified length (between what could be written and `max_length`) if
// writing failed, and reads and writes `max_length` if `max_length` was
// exceeded.
//
// `CopyAll(BackwardWriter&)` writes nothing if reading failed, reads an
// unspecified length (between what could be written and `max_length`) if
// writing failed, and reads `max_length` and writes nothing if `max_length` was
// exceeded.
//
// If `length_read != nullptr` then sets `*length_read` to the length read.
// This is equal to the difference between `src.pos()` after and before the
// call.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the `Reader`. `Src` must support
// `DependencyRef<Reader*, Src>`, e.g. `Reader&` (not owned),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `AnyRef<Reader*>` (maybe owned).
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the `Writer` / `BackwardWriter`. `Dest` must support
// `DependencyRef<Writer*, Dest>`, e.g. `Writer&` (not owned),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `AnyRef<Writer*>` (maybe owned). Analogously for `BackwardWriter`.
template <typename Src, typename Dest,
          std::enable_if_t<
              std::conjunction_v<TargetRefSupportsDependency<Reader*, Src>,
                                 TargetRefSupportsDependency<Writer*, Dest>>,
              int> = 0>
absl::Status CopyAll(Src&& src, Dest&& dest,
                     Position max_length = std::numeric_limits<Position>::max(),
                     Position* length_read = nullptr);
template <typename Src, typename Dest,
          std::enable_if_t<
              std::conjunction_v<TargetRefSupportsDependency<Reader*, Src>,
                                 TargetRefSupportsDependency<Writer*, Dest>>,
              int> = 0>
absl::Status CopyAll(Src&& src, Dest&& dest, Position* length_read);
template <
    typename Src, typename Dest,
    std::enable_if_t<
        std::conjunction_v<TargetRefSupportsDependency<Reader*, Src>,
                           TargetRefSupportsDependency<BackwardWriter*, Dest>>,
        int> = 0>
absl::Status CopyAll(Src&& src, Dest&& dest,
                     size_t max_length = std::numeric_limits<size_t>::max());

// Implementation details follow.

namespace copy_all_internal {

absl::Status CopyAllImpl(Reader& src, Writer& dest, Position max_length,
                         Position* length_read, bool set_write_size_hint);
absl::Status CopyAllImpl(Reader& src, BackwardWriter& dest, size_t max_length,
                         bool set_write_size_hint);

}  // namespace copy_all_internal

template <typename Src, typename Dest,
          std::enable_if_t<
              std::conjunction_v<TargetRefSupportsDependency<Reader*, Src>,
                                 TargetRefSupportsDependency<Writer*, Dest>>,
              int>>
inline absl::Status CopyAll(Src&& src, Dest&& dest, Position max_length,
                            Position* length_read) {
  DependencyRef<Reader*, Src> src_dep(std::forward<Src>(src));
  DependencyRef<Writer*, Dest> dest_dep(std::forward<Dest>(dest));
  if (src_dep.IsOwning()) src_dep->SetReadAllHint(true);
  absl::Status status = copy_all_internal::CopyAllImpl(
      *src_dep, *dest_dep, max_length, length_read, dest_dep.IsOwning());
  if (dest_dep.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_dep->Close())) {
      status.Update(dest_dep->status());
    }
  }
  if (src_dep.IsOwning()) {
    if (ABSL_PREDICT_TRUE(status.ok())) src_dep->VerifyEnd();
    if (ABSL_PREDICT_FALSE(!src_dep->Close())) status.Update(src_dep->status());
  }
  return status;
}

template <typename Src, typename Dest,
          std::enable_if_t<
              std::conjunction_v<TargetRefSupportsDependency<Reader*, Src>,
                                 TargetRefSupportsDependency<Writer*, Dest>>,
              int>>
inline absl::Status CopyAll(Src&& src, Dest&& dest, Position* length_read) {
  return CopyAll(std::forward<Src>(src), std::forward<Dest>(dest),
                 std::numeric_limits<Position>::max(), length_read);
}

template <
    typename Src, typename Dest,
    std::enable_if_t<
        std::conjunction_v<TargetRefSupportsDependency<Reader*, Src>,
                           TargetRefSupportsDependency<BackwardWriter*, Dest>>,
        int>>
inline absl::Status CopyAll(Src&& src, Dest&& dest, size_t max_length) {
  DependencyRef<Reader*, Src> src_dep(std::forward<Src>(src));
  DependencyRef<BackwardWriter*, Dest> dest_dep(std::forward<Dest>(dest));
  if (src_dep.IsOwning()) src_dep->SetReadAllHint(true);
  absl::Status status = copy_all_internal::CopyAllImpl(
      *src_dep, *dest_dep, max_length, dest_dep.IsOwning());
  if (dest_dep.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_dep->Close())) {
      status.Update(dest_dep->status());
    }
  }
  if (src_dep.IsOwning()) {
    if (ABSL_PREDICT_TRUE(status.ok())) src_dep->VerifyEnd();
    if (ABSL_PREDICT_FALSE(!src_dep->Close())) status.Update(src_dep->status());
  }
  return status;
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_COPY_ALL_H_


================================================
FILE: riegeli/bytes/cord_backward_writer.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/cord_backward_writer.h"

#include <stddef.h>

#include <limits>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/strings/cord.h"
#include "absl/strings/cord_buffer.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffer.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/cord_utils.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"

namespace riegeli {

void CordBackwardWriterBase::Done() {
  CordBackwardWriterBase::FlushImpl(FlushType::kFromObject);
  BackwardWriter::Done();
  cord_buffer_ = absl::CordBuffer();
  buffer_ = Buffer();
}

inline size_t CordBackwardWriterBase::MaxBytesToCopy() const {
  if (size_hint_ != std::nullopt && pos() < *size_hint_) {
    return UnsignedClamp(*size_hint_ - pos() - 1,
                         cord_internal::kMaxBytesToCopyToEmptyCord,
                         cord_internal::kMaxBytesToCopyToNonEmptyCord);
  }
  if (pos() == 0) return cord_internal::kMaxBytesToCopyToEmptyCord;
  return cord_internal::kMaxBytesToCopyToNonEmptyCord;
}

inline void CordBackwardWriterBase::SyncBuffer(absl::Cord& dest) {
  if (limit() == nullptr) return;
  set_start_pos(pos());
  const absl::string_view data(cursor(), start_to_cursor());
  if (limit() == cord_buffer_.data()) {
    const size_t prefix_to_remove =
        PtrDistance(cord_buffer_.data(), data.data());
    if (prefix_to_remove == 0) {
      dest.Prepend(std::move(cord_buffer_));
    } else if (Wasteful(cord_internal::kFlatOverhead + cord_buffer_.capacity(),
                        data.size()) ||
               data.size() <= MaxBytesToCopy()) {
      cord_internal::PrependToBlockyCord(data, dest);
    } else {
      dest.Prepend(std::move(cord_buffer_));
      dest.RemovePrefix(prefix_to_remove);
    }
  } else {
    ExternalRef(std::move(buffer_), data).PrependTo(dest);
  }
  set_buffer();
}

void CordBackwardWriterBase::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  if (write_size_hint == std::nullopt) {
    size_hint_ = std::nullopt;
  } else {
    size_hint_ = SaturatingAdd(pos(), *write_size_hint);
  }
}

bool CordBackwardWriterBase::PushSlow(size_t min_length,
                                      size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of BackwardWriter::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (pos() == 0) {
    Position needed_length = UnsignedMax(min_length, recommended_length);
    if (size_hint_ != std::nullopt) {
      needed_length = UnsignedMax(needed_length, *size_hint_);
    }
    if (needed_length <= cord_buffer_.capacity()) {
      // Use the initial capacity of `cord_buffer_`, even if it is smaller than
      // `min_block_size_`, because this avoids allocation.
      cord_buffer_.SetLength(cord_buffer_.capacity());
      set_buffer(cord_buffer_.data(), cord_buffer_.length());
      return true;
    }
  }
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_EQ(start_pos(), dest.size())
      << "CordBackwardWriter destination changed unexpectedly";
  if (ABSL_PREDICT_FALSE(min_length > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(pos()))) {
    return FailOverflow();
  }
  if (start_to_cursor() >= min_block_size_) SyncBuffer(dest);
  const size_t cursor_index = start_to_cursor();
  const size_t buffer_length = ApplyBufferConstraints(
      ApplySizeHint(UnsignedMax(start_pos(), min_block_size_), size_hint_,
                    start_pos()),
      cursor_index + min_length,
      SaturatingAdd(cursor_index, recommended_length), max_block_size_);
  if (buffer_length <= cord_internal::kCordBufferMaxSize) {
    RIEGELI_ASSERT(cord_buffer_.capacity() < buffer_length ||
                   limit() != cord_buffer_.data())
        << "Failed invariant of CordBackwardWriter: "
           "cord_buffer_ has enough capacity but was used only partially";
    const size_t predicted_cord_buffer_size =
        cord_internal::CordBufferSizeForCapacity(buffer_length);
    if (predicted_cord_buffer_size >= cursor_index + min_length) {
      // Reuse the existing `cord_buffer_` if it has at least the same capacity
      // as a new one would have.
      absl::CordBuffer new_cord_buffer =
          cord_buffer_.capacity() >= predicted_cord_buffer_size
              ? std::move(cord_buffer_)
              : absl::CordBuffer::CreateWithCustomLimit(
                    cord_internal::kCordBufferBlockSize, buffer_length);
      if (ABSL_PREDICT_FALSE(new_cord_buffer.capacity() <
                             cursor_index + min_length)) {
        // The size prediction turned out to be wrong, and the actual size is
        // insufficient even for what is required. Ignore `new_cord_buffer`.
      } else {
        new_cord_buffer.SetLength(
            UnsignedMin(new_cord_buffer.capacity(),
                        std::numeric_limits<size_t>::max() - dest.size()));
        riegeli::null_safe_memcpy(
            new_cord_buffer.data() + new_cord_buffer.length() - cursor_index,
            cursor(), cursor_index);
        cord_buffer_ = std::move(new_cord_buffer);
        set_buffer(cord_buffer_.data(), cord_buffer_.length(), cursor_index);
        return true;
      }
    }
  }
  RIEGELI_ASSERT(buffer_.capacity() < buffer_length ||
                 limit() != buffer_.data())
      << "Failed invariant of CordBackwardWriter: "
         "buffer_ has enough capacity but was used only partially";
  Buffer new_buffer = buffer_.capacity() >= buffer_length
                          ? std::move(buffer_)
                          : Buffer(buffer_length);
  const size_t length = UnsignedMin(
      new_buffer.capacity(), std::numeric_limits<size_t>::max() - dest.size());
  riegeli::null_safe_memcpy(new_buffer.data() + length - cursor_index, cursor(),
                            cursor_index);
  buffer_ = std::move(new_buffer);
  set_buffer(buffer_.data(), length, cursor_index);
  return true;
}

bool CordBackwardWriterBase::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  if (src.size() <= MaxBytesToCopy()) {
    return BackwardWriter::WriteSlow(std::move(src));
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_EQ(start_pos(), dest.size())
      << "CordBackwardWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  std::move(src).PrependTo(dest);
  return true;
}

bool CordBackwardWriterBase::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  if (src.size() <= MaxBytesToCopy()) return BackwardWriter::WriteSlow(src);
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_EQ(start_pos(), dest.size())
      << "CordBackwardWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  src.PrependTo(dest);
  return true;
}

bool CordBackwardWriterBase::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  if (src.size() <= MaxBytesToCopy()) {
    // Not `std::move(src)`: forward to
    // `BackwardWriter::WriteSlow(const Chain&)`, because
    // `BackwardWriter::WriteSlow(Chain&&)` would forward to
    // `CordBackwardWriterBase::WriteSlow(const Chain&)`.
    return BackwardWriter::WriteSlow(src);
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_EQ(start_pos(), dest.size())
      << "CordBackwardWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  std::move(src).PrependTo(dest);
  return true;
}

bool CordBackwardWriterBase::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  if (src.size() <= MaxBytesToCopy()) return BackwardWriter::WriteSlow(src);
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_EQ(start_pos(), dest.size())
      << "CordBackwardWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  dest.Prepend(src);
  return true;
}

bool CordBackwardWriterBase::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  if (src.size() <= MaxBytesToCopy()) {
    // Not `std::move(src)`: forward to
    // `BackwardWriter::WriteSlow(const absl::Cord&)`, because
    // `BackwardWriter::WriteSlow(absl::Cord&&)` would forward to
    // `CordBackwardWriterBase::WriteSlow(const absl::Cord&)`.
    return BackwardWriter::WriteSlow(src);
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_EQ(start_pos(), dest.size())
      << "CordBackwardWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  dest.Prepend(std::move(src));
  return true;
}

bool CordBackwardWriterBase::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  if (src.size() <= MaxBytesToCopy()) return BackwardWriter::WriteSlow(src);
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_EQ(start_pos(), dest.size())
      << "CordBackwardWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  src.PrependTo(dest);
  return true;
}

bool CordBackwardWriterBase::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_EQ(start_pos(), dest.size())
      << "CordBackwardWriter destination changed unexpectedly";
  SyncBuffer(dest);
  return true;
}

bool CordBackwardWriterBase::TruncateImpl(Position new_size) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_EQ(start_pos(), dest.size())
      << "CordBackwardWriter destination changed unexpectedly";
  if (new_size >= start_pos()) {
    if (ABSL_PREDICT_FALSE(new_size > pos())) return false;
    set_cursor(start() - (new_size - start_pos()));
    return true;
  }
  set_start_pos(new_size);
  dest.RemovePrefix(dest.size() - IntCast<size_t>(new_size));
  set_cursor(start());
  return true;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/cord_backward_writer.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_CORD_BACKWARD_WRITER_H_
#define RIEGELI_BYTES_CORD_BACKWARD_WRITER_H_

#include <stddef.h>
#include <stdint.h>

#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/cord.h"
#include "absl/strings/cord_buffer.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffer.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"

namespace riegeli {

// Template parameter independent part of `CordBackwardWriter`.
class CordBackwardWriterBase : public BackwardWriter {
 public:
  class Options {
   public:
    Options() noexcept {}

    // If `false`, replaces existing contents of the destination, clearing it
    // first.
    //
    // If `true`, prepends to existing contents of the destination.
    //
    // Default: `false`.
    Options& set_prepend(bool prepend) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      prepend_ = prepend;
      return *this;
    }
    Options&& set_prepend(bool prepend) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_prepend(prepend));
    }
    bool prepend() const { return prepend_; }

    // Minimal size of a block of allocated data.
    //
    // This is used initially, while the destination is small.
    //
    // Default: `kDefaultMinBlockSize` (512).
    Options& set_min_block_size(size_t min_block_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      min_block_size_ = UnsignedMin(min_block_size, uint32_t{1} << 31);
      return *this;
    }
    Options&& set_min_block_size(size_t min_block_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_min_block_size(min_block_size));
    }
    size_t min_block_size() const { return min_block_size_; }

    // Maximal size of a block of allocated data.
    //
    // This is for performance tuning, not a guarantee: does not apply to
    // objects allocated separately and then written to this
    // `CordBackwardWriter`.
    //
    // Default: `kDefaultMaxBlockSize - 13` (65523).
    Options& set_max_block_size(size_t max_block_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GT(max_block_size, 0u)
          << "Failed precondition of "
             "CordBackwardWriterBase::Options::set_max_block_size(): "
             "zero block size";
      max_block_size_ = UnsignedMin(max_block_size, uint32_t{1} << 31);
      return *this;
    }
    Options&& set_max_block_size(size_t max_block_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_max_block_size(max_block_size));
    }
    size_t max_block_size() const { return max_block_size_; }

    // A shortcut for `set_min_block_size(block_size)` with
    // `set_max_block_size(block_size)`.
    Options& set_block_size(size_t block_size) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return set_min_block_size(block_size).set_max_block_size(block_size);
    }
    Options&& set_block_size(size_t block_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_block_size(block_size));
    }

   private:
    bool prepend_ = false;
    // Use `uint32_t` instead of `size_t` to reduce the object size.
    uint32_t min_block_size_ = uint32_t{kDefaultMinBlockSize};
    uint32_t max_block_size_ =
        uint32_t{absl::CordBuffer::MaximumPayload(kDefaultMaxBlockSize)};
  };

  // Returns the `absl::Cord` being written to. Unchanged by `Close()`.
  virtual absl::Cord* DestCord() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool SupportsTruncate() override { return true; }

 protected:
  explicit CordBackwardWriterBase(Closed) noexcept : BackwardWriter(kClosed) {}

  explicit CordBackwardWriterBase(const Options& options);

  CordBackwardWriterBase(CordBackwardWriterBase&& that) noexcept;
  CordBackwardWriterBase& operator=(CordBackwardWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(const Options& options);
  void Initialize(absl::Cord* dest, bool prepend);

  void Done() override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using BackwardWriter::WriteSlow;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(absl::Cord&& src) override;
  bool WriteSlow(ByteFill src) override;
  bool FlushImpl(FlushType flush_type) override;
  bool TruncateImpl(Position new_size) override;

 private:
  // When deciding whether to copy an array of bytes or share memory, prefer
  // copying up to this length.
  size_t MaxBytesToCopy() const;

  // If the buffer is not empty, prepends it to `dest`.
  void SyncBuffer(absl::Cord& dest);

  // Moves `cord_buffer_`, adjusting buffer pointers if they point to it.
  void MoveCordBuffer(CordBackwardWriterBase& that);

  std::optional<Position> size_hint_;
  // Use `uint32_t` instead of `size_t` to reduce the object size.
  uint32_t min_block_size_ = uint32_t{kDefaultMinBlockSize};
  uint32_t max_block_size_ =
      uint32_t{absl::CordBuffer::MaximumPayload(kDefaultMaxBlockSize)};

  // Buffered data to be prepended, in either `cord_buffer_` or `buffer_`.
  absl::CordBuffer cord_buffer_;
  Buffer buffer_;

  // Invariants:
  //   `limit() == nullptr` or
  //       `limit() == cord_buffer_.data() &&
  //        start_to_limit() = cord_buffer_.length()` or
  //       `limit() == buffer_.data()`
  //   if `ok()` then `start_pos() == DestCord()->size()`
};

// A `BackwardWriter` which writes to an `absl::Cord` backwards.
// If `Options::prepend()` is `false` (the default), replaces existing contents
// of the `absl::Cord`, clearing it first. If `Options::prepend()` is `true`,
// prepends to existing contents of the `absl::Cord`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the `absl::Cord` being written to. `Dest` must support
// `Dependency<absl::Cord*, Dest>`, e.g. `absl::Cord*` (not owned, default),
// `absl::Cord` (owned), `Any<absl::Cord*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `absl::Cord`
// if there are no constructor arguments or the only argument is `Options`,
// otherwise as `TargetT` of the type of the first constructor argument, except
// that CTAD is deleted if the first constructor argument is an `absl::Cord&`
// or `const absl::Cord&` (to avoid writing to an unintentionally separate copy
// of an existing object).
//
// The `absl::Cord` must not be accessed until the `CordBackwardWriter` is
// closed or no longer used.
template <typename Dest = absl::Cord*>
class CordBackwardWriter : public CordBackwardWriterBase {
 public:
  // Creates a closed `CordBackwardWriter`.
  explicit CordBackwardWriter(Closed) noexcept
      : CordBackwardWriterBase(kClosed) {}

  // Will write to the `absl::Cord` provided by `dest`.
  explicit CordBackwardWriter(Initializer<Dest> dest,
                              Options options = Options());

  // Will write to an owned `absl::Cord` which can be accessed by `dest()`.
  // This constructor is present only if `Dest` is `absl::Cord`.
  template <
      typename DependentDest = Dest,
      std::enable_if_t<std::is_same_v<DependentDest, absl::Cord>, int> = 0>
  explicit CordBackwardWriter(Options options = Options());

  CordBackwardWriter(CordBackwardWriter&& that) = default;
  CordBackwardWriter& operator=(CordBackwardWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `CordBackwardWriter`. This
  // avoids constructing a temporary `CordBackwardWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());
  template <
      typename DependentDest = Dest,
      std::enable_if_t<std::is_same_v<DependentDest, absl::Cord>, int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Options options = Options());

  // Returns the object providing and possibly owning the `absl::Cord` being
  // written to. Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  absl::Cord* DestCord() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 private:
  // The object providing and possibly owning the `absl::Cord` being written to.
  Dependency<absl::Cord*, Dest> dest_;
};

explicit CordBackwardWriter(Closed) -> CordBackwardWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit CordBackwardWriter(
    Dest&& dest,
    CordBackwardWriterBase::Options options = CordBackwardWriterBase::Options())
    -> CordBackwardWriter<std::conditional_t<
        std::conjunction_v<std::is_lvalue_reference<Dest>,
                           std::is_convertible<std::remove_reference_t<Dest>*,
                                               const absl::Cord*>>,
        DeleteCtad<Dest&&>, TargetT<Dest>>>;
explicit CordBackwardWriter(
    CordBackwardWriterBase::Options options = CordBackwardWriterBase::Options())
    -> CordBackwardWriter<absl::Cord>;

// Implementation details follow.

inline CordBackwardWriterBase::CordBackwardWriterBase(const Options& options)
    : min_block_size_(IntCast<uint32_t>(options.min_block_size())),
      max_block_size_(IntCast<uint32_t>(options.max_block_size())) {}

inline CordBackwardWriterBase::CordBackwardWriterBase(
    CordBackwardWriterBase&& that) noexcept
    : BackwardWriter(static_cast<BackwardWriter&&>(that)),
      size_hint_(that.size_hint_),
      min_block_size_(that.min_block_size_),
      max_block_size_(that.max_block_size_),
      buffer_(std::move(that.buffer_)) {
  MoveCordBuffer(that);
}

inline CordBackwardWriterBase& CordBackwardWriterBase::operator=(
    CordBackwardWriterBase&& that) noexcept {
  BackwardWriter::operator=(static_cast<BackwardWriter&&>(that));
  size_hint_ = that.size_hint_;
  min_block_size_ = that.min_block_size_;
  max_block_size_ = that.max_block_size_;
  buffer_ = std::move(that.buffer_);
  MoveCordBuffer(that);
  return *this;
}

inline void CordBackwardWriterBase::Reset(Closed) {
  BackwardWriter::Reset(kClosed);
  size_hint_ = std::nullopt;
  min_block_size_ = uint32_t{kDefaultMinBlockSize};
  max_block_size_ =
      uint32_t{absl::CordBuffer::MaximumPayload(kDefaultMaxBlockSize)};
  cord_buffer_ = absl::CordBuffer();
  buffer_ = Buffer();
}

inline void CordBackwardWriterBase::Reset(const Options& options) {
  BackwardWriter::Reset();
  size_hint_ = std::nullopt;
  min_block_size_ = IntCast<uint32_t>(options.min_block_size());
  max_block_size_ = IntCast<uint32_t>(options.max_block_size());
}

inline void CordBackwardWriterBase::Initialize(absl::Cord* dest, bool prepend) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of CordBackwardWriter: null Cord pointer";
  if (prepend) {
    set_start_pos(dest->size());
  } else {
    dest->Clear();
  }
}

inline void CordBackwardWriterBase::MoveCordBuffer(
    CordBackwardWriterBase& that) {
  // Buffer pointers are already moved so `limit()` is taken from `*this`.
  // `cord_buffer_` is not moved yet so `cord_buffer_` is taken from `that`.
  const bool uses_cord_buffer = limit() == that.cord_buffer_.data();
  if (uses_cord_buffer) {
    RIEGELI_ASSERT_EQ(that.cord_buffer_.length(), start_to_limit())
        << "Failed invariant of CordBackwardWriter: "
           "CordBuffer has an unexpected length";
  }
  const size_t saved_start_to_cursor = start_to_cursor();
  cord_buffer_ = std::move(that.cord_buffer_);
  if (uses_cord_buffer) {
    set_buffer(cord_buffer_.data(), cord_buffer_.length(),
               saved_start_to_cursor);
  }
}

template <typename Dest>
inline CordBackwardWriter<Dest>::CordBackwardWriter(Initializer<Dest> dest,
                                                    Options options)
    : CordBackwardWriterBase(options), dest_(std::move(dest)) {
  Initialize(dest_.get(), options.prepend());
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<std::is_same_v<DependentDest, absl::Cord>, int>>
inline CordBackwardWriter<Dest>::CordBackwardWriter(Options options)
    : CordBackwardWriter(riegeli::Maker(), std::move(options)) {}

template <typename Dest>
inline void CordBackwardWriter<Dest>::Reset(Closed) {
  CordBackwardWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void CordBackwardWriter<Dest>::Reset(Initializer<Dest> dest,
                                            Options options) {
  CordBackwardWriterBase::Reset(options);
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options.prepend());
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<std::is_same_v<DependentDest, absl::Cord>, int>>
inline void CordBackwardWriter<Dest>::Reset(Options options) {
  Reset(riegeli::Maker(), std::move(options));
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_CORD_BACKWARD_WRITER_H_


================================================
FILE: riegeli/bytes/cord_reader.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/cord_reader.h"

#include <stddef.h>

#include <limits>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/pullable_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void CordReaderBase::Done() {
  PullableReader::Done();
  iter_ = std::nullopt;
}

inline void CordReaderBase::SyncBuffer() {
  RIEGELI_ASSERT(iter_ != std::nullopt)
      << "Failed precondition of CordReaderBase::SyncBuffer(): "
         "no Cord iterator";
  set_limit_pos(pos());
  absl::Cord::Advance(&*iter_, start_to_cursor());
  set_buffer();
}

bool CordReaderBase::PullBehindScratch(size_t recommended_length) {
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of PullableReader::PullBehindScratch(): "
         "enough data available, use Pull() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::PullBehindScratch(): "
         "scratch used";
  if (iter_ == std::nullopt) return false;
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const absl::Cord& src = *SrcCord();
  RIEGELI_ASSERT_LE(limit_pos(), src.size())
      << "CordReader source changed unexpectedly";
  absl::Cord::Advance(&*iter_, start_to_cursor());
  if (ABSL_PREDICT_FALSE(*iter_ == src.char_end())) {
    set_buffer();
    return false;
  }
  const absl::string_view fragment = absl::Cord::ChunkRemaining(*iter_);
  set_buffer(fragment.data(), fragment.size());
  move_limit_pos(available());
  return true;
}

bool CordReaderBase::ReadBehindScratch(size_t length, Chain& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of PullableReader::ReadBehindScratch(Chain&): "
         "enough data available, use Read(Chain&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of PullableReader::ReadBehindScratch(Chain&): "
         "Chain size overflow";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::ReadBehindScratch(Chain&): "
         "scratch used";
  if (iter_ == std::nullopt) {
    return PullableReader::ReadBehindScratch(length, dest);
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const absl::Cord& src = *SrcCord();
  RIEGELI_ASSERT_LE(limit_pos(), src.size())
      << "CordReader source changed unexpectedly";
  SyncBuffer();
  const size_t length_to_read = UnsignedMin(length, src.size() - limit_pos());
  dest.AppendFrom(*iter_, length_to_read);
  move_limit_pos(length_to_read);
  MakeBuffer(src);
  return length_to_read == length;
}

bool CordReaderBase::ReadBehindScratch(size_t length, absl::Cord& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of PullableReader::ReadBehindScratch(Cord&): "
         "enough data available, use Read(Cord&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of PullableReader::ReadBehindScratch(Cord&): "
         "Cord size overflow";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::ReadBehindScratch(Cord&): "
         "scratch used";
  if (iter_ == std::nullopt) {
    return PullableReader::ReadBehindScratch(length, dest);
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const absl::Cord& src = *SrcCord();
  RIEGELI_ASSERT_LE(limit_pos(), src.size())
      << "CordReader source changed unexpectedly";
  SyncBuffer();
  const size_t length_to_read = UnsignedMin(length, src.size() - limit_pos());
  if (length_to_read == src.size()) {
    dest.Append(src);
    *iter_ = src.char_end();
  } else {
    dest.Append(absl::Cord::AdvanceAndRead(&*iter_, length_to_read));
  }
  move_limit_pos(length_to_read);
  MakeBuffer(src);
  return length_to_read == length;
}

bool CordReaderBase::CopyBehindScratch(Position length, Writer& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of PullableReader::CopyBehindScratch(Writer&): "
         "enough data available, use Copy(Writer&) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::CopyBehindScratch(Writer&): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const absl::Cord& src = *SrcCord();
  RIEGELI_ASSERT_LE(limit_pos(), src.size())
      << "CordReader source changed unexpectedly";
  const size_t length_to_copy =
      UnsignedMin(length, src.size() - IntCast<size_t>(pos()));
  if (length_to_copy == src.size()) {
    RIEGELI_EVAL_ASSERT(Skip(length_to_copy));
    if (ABSL_PREDICT_FALSE(!dest.Write(src))) return false;
  } else if (length_to_copy <= kMaxBytesToCopy) {
    if (ABSL_PREDICT_FALSE(!dest.Push(length_to_copy))) return false;
    RIEGELI_EVAL_ASSERT(Read(length_to_copy, dest.cursor()));
    dest.move_cursor(length_to_copy);
  } else {
    absl::Cord data;
    RIEGELI_EVAL_ASSERT(Read(length_to_copy, data));
    if (ABSL_PREDICT_FALSE(!dest.Write(std::move(data)))) return false;
  }
  return length_to_copy == length;
}

bool CordReaderBase::CopyBehindScratch(size_t length, BackwardWriter& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of "
         "PullableReader::CopyBehindScratch(BackwardWriter&): "
         "enough data available, use Copy(BackwardWriter&) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PullableReader::CopyBehindScratch(BackwardWriter&): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const absl::Cord& src = *SrcCord();
  RIEGELI_ASSERT_LE(limit_pos(), src.size())
      << "CordReader source changed unexpectedly";
  if (ABSL_PREDICT_FALSE(length > src.size() - pos())) {
    RIEGELI_EVAL_ASSERT(Seek(src.size()));
    return false;
  }
  if (length == src.size()) {
    RIEGELI_EVAL_ASSERT(Skip(length));
    return dest.Write(src);
  }
  if (length <= kMaxBytesToCopy) {
    if (ABSL_PREDICT_FALSE(!dest.Push(length))) return false;
    dest.move_cursor(length);
    if (ABSL_PREDICT_FALSE(!ReadBehindScratch(length, dest.cursor()))) {
      dest.set_cursor(dest.cursor() + length);
      return false;
    }
    return true;
  }
  absl::Cord data;
  RIEGELI_EVAL_ASSERT(ReadBehindScratch(length, data));
  return dest.Write(std::move(data));
}

bool CordReaderBase::SeekBehindScratch(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of PullableReader::SeekBehindScratch(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::SeekBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (iter_ == std::nullopt) {
    RIEGELI_ASSERT_EQ(start_pos(), 0u)
        << "Failed invariant of CordReaderBase: "
           "no Cord iterator but non-zero position of buffer start";
    // Seeking forwards. Source ends.
    set_cursor(limit());
    return false;
  }
  const absl::Cord& src = *SrcCord();
  RIEGELI_ASSERT_LE(limit_pos(), src.size())
      << "CordReader source changed unexpectedly";
  size_t length;
  if (new_pos > limit_pos()) {
    // Seeking forwards.
    if (new_pos >= src.size()) {
      // Source ends.
      *iter_ = src.char_end();
      set_buffer();
      set_limit_pos(src.size());
      return new_pos == src.size();
    }
    length = IntCast<size_t>(new_pos - start_pos());
  } else {
    // Seeking backwards.
    *iter_ = src.char_begin();
    length = IntCast<size_t>(new_pos);
  }
  absl::Cord::Advance(&*iter_, length);
  set_limit_pos(new_pos);
  MakeBuffer(src);
  return true;
}

std::optional<Position> CordReaderBase::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  const absl::Cord& src = *SrcCord();
  return src.size();
}

std::unique_ptr<Reader> CordReaderBase::NewReaderImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point.
  const absl::Cord& src = *SrcCord();
  std::unique_ptr<Reader> reader = std::make_unique<CordReader<>>(&src);
  reader->Seek(initial_pos);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/cord_reader.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_CORD_READER_H_
#define RIEGELI_BYTES_CORD_READER_H_

#include <stddef.h>

#include <memory>
#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/pullable_reader.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

class BackwardWriter;
class Writer;

// Template parameter independent part of `CordReader`.
class CordReaderBase : public PullableReader {
 public:
  // Returns the `absl::Cord` being read from. Unchanged by `Close()`.
  virtual const absl::Cord* SrcCord() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool ToleratesReadingAhead() override { return true; }
  bool SupportsRandomAccess() override { return true; }
  bool SupportsNewReader() override { return true; }

 protected:
  using PullableReader::PullableReader;

  CordReaderBase(CordReaderBase&& that) noexcept;
  CordReaderBase& operator=(CordReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(const absl::Cord* src);

  void Done() override;
  bool PullBehindScratch(size_t recommended_length) override;
  using PullableReader::ReadBehindScratch;
  bool ReadBehindScratch(size_t length, Chain& dest) override;
  bool ReadBehindScratch(size_t length, absl::Cord& dest) override;
  using PullableReader::CopyBehindScratch;
  bool CopyBehindScratch(Position length, Writer& dest) override;
  bool CopyBehindScratch(size_t length, BackwardWriter& dest) override;
  bool SeekBehindScratch(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;

  // Invariant:
  //   if `!is_open()` or
  //      `*SrcCord()` is flat with size at most `kMaxBytesToCopy`
  //       then `iter_ == std::nullopt`
  //       else `iter_ != std::nullopt` and
  //            `*iter_` reads from `*SrcCord()`
  std::optional<absl::Cord::CharIterator> iter_;

 private:
  // Moves `*iter_` to account for data which have been read from the buffer.
  //
  // Precondition: `iter_ != std::nullopt`
  void SyncBuffer();

  // Sets buffer pointers to `absl::Cord::ChunkRemaining(*iter_)`,
  // or to `nullptr` if `*iter_ == src.char_end()`.
  //
  // Precondition: `iter_ != std::nullopt`
  void MakeBuffer(const absl::Cord& src);

  // Invariants if `iter_ == std::nullopt` and `is_open()`:
  //   scratch is not used
  //   `start() == SrcCord()->TryFlat()->data()`
  //   `start_to_limit() == SrcCord()->TryFlat()->size()`
  //   `start_pos() == 0`
  //
  // Invariants if `iter_ != std::nullopt` and scratch is not used:
  //   `start() == (*iter_ == SrcCord()->char_end()
  //                    ? nullptr
  //                    : absl::Cord::ChunkRemaining(*iter_).data())`
  //   `start_to_limit() == (*iter_ == SrcCord()->char_end()
  //                          ? 0
  //                          : absl::Cord::ChunkRemaining(*iter_).size())`
  //   `start_pos()` is the position of `*iter_` in `*SrcCord()`
};

// A `Reader` which reads from an `absl::Cord`.
//
// It supports random access and `NewReader()`.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the `absl::Cord` being read from. `Src` must support
// `Dependency<const absl::Cord*, Src>`, e.g.
// `const absl::Cord*` (not owned, default), `absl::Cord` (owned),
// `Any<const absl::Cord*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The `absl::Cord` must not be changed until the `CordReader` is closed or no
// longer used.
template <typename Src = const absl::Cord*>
class CordReader : public CordReaderBase {
 public:
  // Creates a closed `CordReader`.
  explicit CordReader(Closed) noexcept : CordReaderBase(kClosed) {}

  // Will read from the `absl::Cord` provided by `src`.
  explicit CordReader(Initializer<Src> src);

  CordReader(CordReader&& that) = default;
  CordReader& operator=(CordReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `CordReader`. This avoids
  // constructing a temporary `CordReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src);

  // Returns the object providing and possibly owning the `absl::Cord` being
  // read from. Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  const absl::Cord* SrcCord() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 private:
  class Mover;

  // The object providing and possibly owning the `absl::Cord` being read from.
  MovingDependency<const absl::Cord*, Src, Mover> src_;
};

explicit CordReader(Closed) -> CordReader<DeleteCtad<Closed>>;
template <typename Src>
explicit CordReader(Src&& src) -> CordReader<TargetT<Src>>;

// Implementation details follow.

inline CordReaderBase::CordReaderBase(CordReaderBase&& that) noexcept
    : PullableReader(static_cast<PullableReader&&>(that)),
      iter_(std::exchange(that.iter_, std::nullopt)) {}

inline CordReaderBase& CordReaderBase::operator=(
    CordReaderBase&& that) noexcept {
  PullableReader::operator=(static_cast<PullableReader&&>(that));
  iter_ = std::exchange(that.iter_, std::nullopt);
  return *this;
}

inline void CordReaderBase::Reset(Closed) {
  PullableReader::Reset(kClosed);
  iter_ = std::nullopt;
}

inline void CordReaderBase::Reset() {
  PullableReader::Reset();
  iter_ = std::nullopt;
}

inline void CordReaderBase::Initialize(const absl::Cord* src) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of CordReader: null Cord pointer";
  if (const std::optional<absl::string_view> flat = src->TryFlat()) {
    if (flat->size() <= kMaxBytesToCopy) {
      set_buffer(flat->data(), flat->size());
      move_limit_pos(available());
      return;
    }
  }
  iter_ = src->char_begin();
  MakeBuffer(*src);
}

inline void CordReaderBase::MakeBuffer(const absl::Cord& src) {
  RIEGELI_ASSERT(iter_ != std::nullopt)
      << "Failed precondition of CordReaderBase::MakeBuffer(): "
         "no Cord iterator";
  if (*iter_ == src.char_end()) {
    set_buffer();
    return;
  }
  const absl::string_view fragment = absl::Cord::ChunkRemaining(*iter_);
  set_buffer(fragment.data(), fragment.size());
  move_limit_pos(available());
}

template <typename Src>
class CordReader<Src>::Mover {
 public:
  static auto member() { return &CordReader::src_; }

  explicit Mover(CordReader& self, ABSL_ATTRIBUTE_UNUSED CordReader& that)
      : behind_scratch_(&self),
        uses_buffer_(self.start() != nullptr),
        position_(IntCast<size_t>(self.start_pos())),
        start_to_cursor_(self.start_to_cursor()) {
#if RIEGELI_DEBUG
    if (self.iter_ == std::nullopt) {
      if (uses_buffer_) {
        const std::optional<absl::string_view> flat = that.src_->TryFlat();
        RIEGELI_ASSERT(flat != std::nullopt)
            << "CordReader source changed unexpectedly";
        RIEGELI_ASSERT_EQ(flat->data(), self.start())
            << "CordReader source changed unexpectedly";
        RIEGELI_ASSERT_EQ(flat->size(), self.start_to_limit())
            << "CordReader source changed unexpectedly";
      }
    } else {
      if (position_ == that.src_->size()) {
        RIEGELI_ASSERT(*self.iter_ == that.src_->char_end())
            << "CordReader source changed unexpectedly";
        RIEGELI_ASSERT(!uses_buffer_)
            << "CordReader source changed unexpectedly";
      } else {
        const absl::string_view fragment =
            absl::Cord::ChunkRemaining(*self.iter_);
        RIEGELI_ASSERT_EQ(fragment.data(), self.start())
            << "CordReader source changed unexpectedly";
        RIEGELI_ASSERT_EQ(fragment.size(), self.start_to_limit())
            << "CordReader source changed unexpectedly";
      }
    }
#endif
  }

  void Done(CordReader& self) {
    if (self.iter_ == std::nullopt) {
      if (uses_buffer_) {
        const std::optional<absl::string_view> flat = self.src_->TryFlat();
        RIEGELI_ASSERT(flat != std::nullopt)
            << "CordReader source changed unexpectedly";
        self.set_buffer(flat->data(), flat->size(), start_to_cursor_);
      }
    } else {
      if (position_ == self.src_->size()) {
        self.iter_ = self.src_->char_end();
      } else {
        self.iter_ = self.src_->char_begin();
        absl::Cord::Advance(&*self.iter_, position_);
        const absl::string_view fragment =
            absl::Cord::ChunkRemaining(*self.iter_);
        self.set_buffer(fragment.data(), fragment.size(), start_to_cursor_);
      }
    }
  }

 private:
  BehindScratch behind_scratch_;
  bool uses_buffer_;
  size_t position_;
  size_t start_to_cursor_;
};

template <typename Src>
inline CordReader<Src>::CordReader(Initializer<Src> src)
    : src_(std::move(src)) {
  Initialize(src_.get());
}

template <typename Src>
inline void CordReader<Src>::Reset(Closed) {
  CordReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void CordReader<Src>::Reset(Initializer<Src> src) {
  CordReaderBase::Reset();
  src_.Reset(std::move(src));
  Initialize(src_.get());
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_CORD_READER_H_


================================================
FILE: riegeli/bytes/cord_writer.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/cord_writer.h"

#include <stddef.h>

#include <limits>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/strings/cord.h"
#include "absl/strings/cord_buffer.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffer.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/cord_utils.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/cord_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void CordWriterBase::Done() {
  CordWriterBase::FlushImpl(FlushType::kFromObject);
  Writer::Done();
  cord_buffer_ = absl::CordBuffer();
  buffer_ = Buffer();
  tail_.reset();
  associated_reader_.Reset();
}

inline size_t CordWriterBase::MaxBytesToCopy() const {
  if (size_hint_ != std::nullopt && pos() < *size_hint_) {
    return UnsignedClamp(*size_hint_ - pos() - 1,
                         cord_internal::kMaxBytesToCopyToEmptyCord,
                         cord_internal::kMaxBytesToCopyToNonEmptyCord);
  }
  if (pos() == 0) return cord_internal::kMaxBytesToCopyToEmptyCord;
  return cord_internal::kMaxBytesToCopyToNonEmptyCord;
}

inline void CordWriterBase::SyncBuffer(absl::Cord& dest) {
  if (ABSL_PREDICT_FALSE(HasAppendedTail(dest))) {
    ExtractTail(dest);
    return;
  }
  if (start() == nullptr) return;
  ShrinkTail(start_to_cursor());
  set_start_pos(pos());
  const absl::string_view data(start(), start_to_cursor());
  if (start() == cord_buffer_.data()) {
    cord_buffer_.SetLength(data.size());
    if (Wasteful(cord_internal::kFlatOverhead + cord_buffer_.capacity(),
                 cord_buffer_.length())) {
      cord_internal::AppendToBlockyCord(data, dest);
    } else {
      dest.Append(std::move(cord_buffer_));
    }
  } else {
    ExternalRef(std::move(buffer_), data).AppendTo(dest);
  }
  set_buffer();
}

inline void CordWriterBase::MoveFromTail(size_t length, absl::Cord& dest) {
  RIEGELI_ASSERT_NE(tail_, nullptr)
      << "Failed precondition of CordWriterBase::MoveFromTail(): no tail";
  RIEGELI_ASSERT_LE(length, tail_->size())
      << "Failed precondition of CordWriterBase::MoveFromTail(): "
         "length longer than the tail";
  if (length == tail_->size()) {
    dest.Append(std::move(*tail_));
    tail_->Clear();
    return;
  }
  dest.Append(tail_->Subcord(0, length));
  tail_->RemovePrefix(length);
}

inline void CordWriterBase::MoveToTail(size_t length, absl::Cord& dest) {
  RIEGELI_ASSERT_LE(length, dest.size())
      << "Failed precondition of CordWriterBase::MoveToTail(): "
         "length longer than the destination";
  if (tail_ == nullptr) tail_ = std::make_unique<absl::Cord>();
  if (length == dest.size()) {
    tail_->Prepend(std::move(dest));
    dest.Clear();
    return;
  }
  tail_->Prepend(dest.Subcord(dest.size() - length, length));
  dest.RemoveSuffix(length);
}

inline bool CordWriterBase::HasAppendedTail(const absl::Cord& dest) const {
  return start_pos() < dest.size();
}

inline void CordWriterBase::ExtractTail(absl::Cord& dest) {
  RIEGELI_ASSERT(HasAppendedTail(dest))
      << "Failed precondition of CordWriterBase::ExtractTail(): "
         "the tail is not appended";
  RIEGELI_ASSERT_EQ(start(), nullptr)
      << "Failed invariant of CordWriterBase: "
         "both a buffer and the appended tail are present";
  MoveToTail(dest.size() - IntCast<size_t>(start_pos()), dest);
}

inline void CordWriterBase::AppendTail(absl::Cord& dest) {
  RIEGELI_ASSERT(!HasAppendedTail(dest))
      << "Failed precondition of CordWriterBase::AppendTail(): "
         "the tail is appended";
  if (ABSL_PREDICT_FALSE(tail_ != nullptr)) {
    dest.Append(std::move(*tail_));
    tail_->Clear();
  }
}

inline void CordWriterBase::ShrinkTail(size_t length) {
  if (ABSL_PREDICT_FALSE(tail_ != nullptr)) {
    tail_->RemovePrefix(UnsignedMin(length, tail_->size()));
  }
}

void CordWriterBase::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  if (write_size_hint == size_hint_) {
    size_hint_ = std::nullopt;
  } else {
    size_hint_ = SaturatingAdd(pos(), *write_size_hint);
  }
}

bool CordWriterBase::PushSlow(size_t min_length, size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Writer::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_LE(start_pos(), dest.size())
      << "CordWriter destination changed unexpectedly";
  if (ABSL_PREDICT_FALSE(HasAppendedTail(dest))) {
    ExtractTail(dest);
  } else if (pos() == 0) {
    Position needed_length = UnsignedMax(min_length, recommended_length);
    if (size_hint_ != std::nullopt) {
      needed_length = UnsignedMax(needed_length, *size_hint_);
    }
    if (needed_length <= cord_buffer_.capacity()) {
      // Use the initial capacity of `cord_buffer_`, even if it is smaller than
      // `min_block_size_`, because this avoids allocation.
      cord_buffer_.SetLength(cord_buffer_.capacity());
      set_buffer(cord_buffer_.data(), cord_buffer_.length());
      return true;
    }
  }
  if (ABSL_PREDICT_FALSE(min_length > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(pos()))) {
    return FailOverflow();
  }
  if (start_to_cursor() >= min_block_size_) SyncBuffer(dest);
  const size_t cursor_index = start_to_cursor();
  const size_t buffer_length = ApplyBufferConstraints(
      ApplySizeHint(UnsignedMax(start_pos(), min_block_size_), size_hint_,
                    start_pos()),
      cursor_index + min_length,
      SaturatingAdd(cursor_index, recommended_length), max_block_size_);
  if (buffer_length <= cord_internal::kCordBufferMaxSize) {
    RIEGELI_ASSERT(cord_buffer_.capacity() < buffer_length ||
                   start() != cord_buffer_.data())
        << "Failed invariant of CordWriter: "
           "cord_buffer_ has enough capacity but was used only partially";
    const size_t predicted_cord_buffer_size =
        cord_internal::CordBufferSizeForCapacity(buffer_length);
    if (predicted_cord_buffer_size >= cursor_index + min_length) {
      // Reuse the existing `cord_buffer_` if it has at least the same capacity
      // as a new one would have.
      absl::CordBuffer new_cord_buffer =
          cord_buffer_.capacity() >= predicted_cord_buffer_size
              ? std::move(cord_buffer_)
              : absl::CordBuffer::CreateWithCustomLimit(
                    cord_internal::kCordBufferBlockSize, buffer_length);
      if (ABSL_PREDICT_FALSE(new_cord_buffer.capacity() <
                             cursor_index + min_length)) {
        // The size prediction turned out to be wrong, and the actual size is
        // insufficient even for what is required. Ignore `new_cord_buffer`.
      } else {
        new_cord_buffer.SetLength(
            UnsignedMin(new_cord_buffer.capacity(),
                        std::numeric_limits<size_t>::max() - dest.size()));
        riegeli::null_safe_memcpy(new_cord_buffer.data(), start(),
                                  cursor_index);
        cord_buffer_ = std::move(new_cord_buffer);
        set_buffer(cord_buffer_.data(), cord_buffer_.length(), cursor_index);
        return true;
      }
    }
  }
  RIEGELI_ASSERT(buffer_.capacity() < buffer_length ||
                 start() != buffer_.data())
      << "Failed invariant of CordWriter: "
         "buffer_ has enough capacity but was used only partially";
  Buffer new_buffer = buffer_.capacity() >= buffer_length
                          ? std::move(buffer_)
                          : Buffer(buffer_length);
  riegeli::null_safe_memcpy(new_buffer.data(), start(), cursor_index);
  buffer_ = std::move(new_buffer);
  set_buffer(buffer_.data(),
             UnsignedMin(buffer_.capacity(),
                         std::numeric_limits<size_t>::max() - dest.size()),
             cursor_index);
  return true;
}

bool CordWriterBase::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  if (src.size() <= MaxBytesToCopy()) return Writer::WriteSlow(std::move(src));
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_LE(start_pos(), dest.size())
      << "CordWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  ShrinkTail(src.size());
  move_start_pos(src.size());
  std::move(src).AppendTo(dest);
  return true;
}

bool CordWriterBase::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  if (src.size() <= MaxBytesToCopy()) return Writer::WriteSlow(src);
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_LE(start_pos(), dest.size())
      << "CordWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  ShrinkTail(src.size());
  move_start_pos(src.size());
  src.AppendTo(dest);
  return true;
}

bool CordWriterBase::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  if (src.size() <= MaxBytesToCopy()) {
    // Not `std::move(src)`: forward to `Writer::WriteSlow(const Chain&)`,
    // because `Writer::WriteSlow(Chain&&)` would forward to
    // `CordWriterBase::WriteSlow(const Chain&)`.
    return Writer::WriteSlow(src);
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_LE(start_pos(), dest.size())
      << "CordWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  ShrinkTail(src.size());
  move_start_pos(src.size());
  std::move(src).AppendTo(dest);
  return true;
}

bool CordWriterBase::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  if (src.size() <= MaxBytesToCopy()) return Writer::WriteSlow(src);
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_LE(start_pos(), dest.size())
      << "CordWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  ShrinkTail(src.size());
  move_start_pos(src.size());
  dest.Append(src);
  return true;
}

bool CordWriterBase::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  if (src.size() <= MaxBytesToCopy()) {
    // Not `std::move(src)`: forward to `Writer::WriteSlow(const absl::Cord&)`,
    // because `Writer::WriteSlow(absl::Cord&&)` would forward to
    // `CordWriterBase::WriteSlow(const absl::Cord&)`.
    return Writer::WriteSlow(src);
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_LE(start_pos(), dest.size())
      << "CordWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  ShrinkTail(src.size());
  move_start_pos(src.size());
  dest.Append(std::move(src));
  return true;
}

bool CordWriterBase::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  if (src.size() <= MaxBytesToCopy()) return Writer::WriteSlow(src);
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_LE(start_pos(), dest.size())
      << "CordWriter destination changed unexpectedly";
  SyncBuffer(dest);
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  ShrinkTail(IntCast<size_t>(src.size()));
  move_start_pos(src.size());
  src.AppendTo(dest);
  return true;
}

bool CordWriterBase::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_LE(start_pos(), dest.size())
      << "CordWriter destination changed unexpectedly";
  if (ABSL_PREDICT_FALSE(HasAppendedTail(dest))) {
    RIEGELI_ASSERT_EQ(start(), nullptr)
        << "Failed invariant of CordWriterBase: "
           "both a buffer and the appended tail are present";
    RIEGELI_ASSERT(tail_ == nullptr || tail_->empty())
        << "Failed invariant of CordWriterBase: "
           "the tail is both appended and separated";
    return true;
  }
  SyncBuffer(dest);
  AppendTail(dest);
  return true;
}

bool CordWriterBase::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT_NE(new_pos, pos())
      << "Failed precondition of Writer::SeekSlow(): "
         "position unchanged, use Seek() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_LE(start_pos(), dest.size())
      << "CordWriter destination changed unexpectedly";
  if (ABSL_PREDICT_FALSE(HasAppendedTail(dest))) {
    RIEGELI_ASSERT_EQ(start(), nullptr)
        << "Failed invariant of CordWriterBase: "
           "both a buffer and the appended tail are present";
    RIEGELI_ASSERT(tail_ == nullptr || tail_->empty())
        << "Failed invariant of CordWriterBase: "
           "the tail is both appended and separated";
    if (ABSL_PREDICT_FALSE(new_pos > dest.size())) {
      set_start_pos(dest.size());
      return false;
    }
    MoveToTail(dest.size() - IntCast<size_t>(new_pos), dest);
    set_start_pos(new_pos);
    return true;
  }
  if (new_pos > pos()) {
    if (ABSL_PREDICT_TRUE(tail_ == nullptr) || tail_->empty()) return false;
    SyncBuffer(dest);
    if (ABSL_PREDICT_FALSE(new_pos > dest.size() + tail_->size())) {
      AppendTail(dest);
      set_start_pos(dest.size());
      return false;
    }
    MoveFromTail(IntCast<size_t>(new_pos) - dest.size(), dest);
  } else {
    SyncBuffer(dest);
    MoveToTail(dest.size() - IntCast<size_t>(new_pos), dest);
  }
  set_start_pos(new_pos);
  return true;
}

std::optional<Position> CordWriterBase::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_LE(start_pos(), dest.size())
      << "CordWriter destination changed unexpectedly";
  if (ABSL_PREDICT_FALSE(HasAppendedTail(dest))) {
    RIEGELI_ASSERT_EQ(start(), nullptr)
        << "Failed invariant of CordWriterBase: "
           "both a buffer and the appended tail are present";
    RIEGELI_ASSERT(tail_ == nullptr || tail_->empty())
        << "Failed invariant of CordWriterBase: "
           "the tail is both appended and separated";
    return dest.size();
  }
  if (ABSL_PREDICT_FALSE(tail_ != nullptr)) {
    return UnsignedMax(pos(), start_pos() + tail_->size());
  }
  return pos();
}

bool CordWriterBase::TruncateImpl(Position new_size) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Cord& dest = *DestCord();
  RIEGELI_ASSERT_LE(start_pos(), dest.size())
      << "CordWriter destination changed unexpectedly";
  if (ABSL_PREDICT_FALSE(HasAppendedTail(dest))) {
    RIEGELI_ASSERT_EQ(start(), nullptr)
        << "Failed invariant of CordWriterBase: "
           "both a buffer and the appended tail are present";
    RIEGELI_ASSERT(tail_ == nullptr || tail_->empty())
        << "Failed invariant of CordWriterBase: "
           "the tail is both appended and separated";
    if (ABSL_PREDICT_FALSE(new_size > dest.size())) {
      set_start_pos(dest.size());
      return false;
    }
  } else if (new_size > pos()) {
    if (ABSL_PREDICT_TRUE(tail_ == nullptr) || tail_->empty()) return false;
    SyncBuffer(dest);
    if (ABSL_PREDICT_FALSE(new_size > dest.size() + tail_->size())) {
      move_start_pos(tail_->size());
      AppendTail(dest);
      return false;
    }
    set_start_pos(new_size);
    tail_->RemoveSuffix(dest.size() + tail_->size() -
                        IntCast<size_t>(new_size));
    AppendTail(dest);
    return true;
  } else {
    if (ABSL_PREDICT_FALSE(tail_ != nullptr)) tail_->Clear();
    if (new_size >= start_pos()) {
      set_cursor(start() + (new_size - start_pos()));
      return true;
    }
  }
  RIEGELI_ASSERT(tail_ == nullptr || tail_->empty());
  RIEGELI_ASSERT_LE(new_size, dest.size());
  set_start_pos(new_size);
  dest.RemoveSuffix(dest.size() - IntCast<size_t>(new_size));
  set_cursor(start());
  return true;
}

Reader* CordWriterBase::ReadModeImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!CordWriterBase::FlushImpl(FlushType::kFromObject))) {
    return nullptr;
  }
  absl::Cord& dest = *DestCord();
  CordReader<>* const reader = associated_reader_.ResetReader(&dest);
  reader->Seek(initial_pos);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/cord_writer.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_CORD_WRITER_H_
#define RIEGELI_BYTES_CORD_WRITER_H_

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <memory>
#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/cord.h"
#include "absl/strings/cord_buffer.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffer.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

template <typename Src>
class CordReader;
class Reader;

// Template parameter independent part of `CordWriter`.
class CordWriterBase : public Writer {
 public:
  class Options {
   public:
    Options() noexcept {}

    // If `false`, replaces existing contents of the destination, clearing it
    // first.
    //
    // If `true`, appends to existing contents of the destination.
    //
    // Default: `false`.
    Options& set_append(bool append) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      append_ = append;
      return *this;
    }
    Options&& set_append(bool append) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_append(append));
    }
    bool append() const { return append_; }

    // Minimal size of a block of allocated data.
    //
    // This is used initially, while the destination is small.
    //
    // Default: `kDefaultMinBlockSize` (512).
    Options& set_min_block_size(size_t min_block_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      min_block_size_ = UnsignedMin(min_block_size, uint32_t{1} << 31);
      return *this;
    }
    Options&& set_min_block_size(size_t min_block_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_min_block_size(min_block_size));
    }
    size_t min_block_size() const { return min_block_size_; }

    // Maximal size of a block of allocated data.
    //
    // This is for performance tuning, not a guarantee: does not apply to
    // objects allocated separately and then written to this `CordWriter`.
    //
    // Default: `kDefaultMaxBlockSize - 13` (65523).
    Options& set_max_block_size(size_t max_block_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GT(max_block_size, 0u)
          << "Failed precondition of "
             "CordWriterBase::Options::set_max_block_size(): "
             "zero block size";
      max_block_size_ = UnsignedMin(max_block_size, uint32_t{1} << 31);
      return *this;
    }
    Options&& set_max_block_size(size_t max_block_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_max_block_size(max_block_size));
    }
    size_t max_block_size() const { return max_block_size_; }

    // A shortcut for `set_min_block_size(block_size)` with
    // `set_max_block_size(block_size)`.
    Options& set_block_size(size_t block_size) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return set_min_block_size(block_size).set_max_block_size(block_size);
    }
    Options&& set_block_size(size_t block_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_block_size(block_size));
    }

   private:
    bool append_ = false;
    // Use `uint32_t` instead of `size_t` to reduce the object size.
    uint32_t min_block_size_ = uint32_t{kDefaultMinBlockSize};
    uint32_t max_block_size_ =
        uint32_t{absl::CordBuffer::MaximumPayload(kDefaultMaxBlockSize)};
  };

  // Returns the `absl::Cord` being written to. Unchanged by `Close()`.
  virtual absl::Cord* DestCord() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;
  absl::Cord& Digest() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    Flush();
    return *DestCord();
  }

  bool SupportsRandomAccess() override { return true; }
  bool SupportsReadMode() override { return true; }

 protected:
  explicit CordWriterBase(Closed) noexcept : Writer(kClosed) {}

  explicit CordWriterBase(const Options& options);

  CordWriterBase(CordWriterBase&& that) noexcept;
  CordWriterBase& operator=(CordWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(const Options& options);
  void Initialize(absl::Cord* dest, bool append);

  void Done() override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using Writer::WriteSlow;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(absl::Cord&& src) override;
  bool WriteSlow(ByteFill src) override;
  bool FlushImpl(FlushType flush_type) override;
  bool SeekSlow(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  bool TruncateImpl(Position new_size) override;
  Reader* ReadModeImpl(Position initial_pos) override;

 private:
  // When deciding whether to copy an array of bytes or share memory, prefer
  // copying up to this length.
  size_t MaxBytesToCopy() const;

  // If the buffer is not empty, appends it to `dest`. Ensures that data which
  // follow the current position are separated in `*tail_`.
  void SyncBuffer(absl::Cord& dest);

  // Moves `cord_buffer_`, adjusting buffer pointers if they point to it.
  void MoveCordBuffer(CordWriterBase& that);

  // Moves `length` of data from the beginning of `*tail_` to the end of `dest`.
  void MoveFromTail(size_t length, absl::Cord& dest);

  // Moves `length` of data from the end of `dest` to the beginning of `*tail_`.
  void MoveToTail(size_t length, absl::Cord& dest);

  // Returns `true` if data which follow the current position are appended to
  // `dest`.
  bool HasAppendedTail(const absl::Cord& dest) const;

  // Moves data which follow the current position from being appended to `dest`
  // to being separated in `*tail_`.
  void ExtractTail(absl::Cord& dest);

  // Moves data which follow the current position from being separated in
  // `*tail_` to being appended to `dest`.
  void AppendTail(absl::Cord& dest);

  // Removes a prefix of `*tail_` of the given `length`, staturated at clearing
  // the whole `*tail_`.
  void ShrinkTail(size_t length);

  std::optional<Position> size_hint_;
  // Use `uint32_t` instead of `size_t` to reduce the object size.
  uint32_t min_block_size_ = uint32_t{kDefaultMinBlockSize};
  uint32_t max_block_size_ =
      uint32_t{absl::CordBuffer::MaximumPayload(kDefaultMaxBlockSize)};

  // Buffered data to be appended, in either `cord_buffer_` or `buffer_`.
  absl::CordBuffer cord_buffer_;
  Buffer buffer_;

  // If `start_pos() < DestCord()->size()`, then data after the current
  // position are appended to `*DestCord()`, buffer pointers are `nullptr`,
  // and `tail_ == nullptr || tail_->empty()`.
  //
  // Otherwise, if `start_pos() == DestCord()->size() && tail_ != nullptr`,
  // data after the current position are separated in `*tail_`, ignoring a
  // prefix of `*tail_` with length `start_to_cursor()`, saturated at ignoring
  // the whole `*tail_` (the ignored prefix is being overwritten with buffered
  // data).
  //
  // Otherwise `start_pos() == DestCord()->size() && tail_ == nullptr`, and
  // there are no data after the current position.
  //
  // `tail_` is stored behind `std::unique_ptr` to reduce the object size in the
  // common case when random access is not used.
  std::unique_ptr<absl::Cord> tail_;

  AssociatedReader<CordReader<const absl::Cord*>> associated_reader_;

  // Invariants:
  //   `start() == nullptr` or
  //       `start() == cord_buffer_.data() &&
  //        start_to_limit() == cord_buffer_.length()` or
  //       `start() == buffer_.data()`
  //   if `ok()` then `start_pos() <= DestCord()->size()`
  //   if `ok() && start_pos() < DestCord()->size()` then
  //       `start() == nullptr && (tail_ == nullptr || tail_->empty())`
};

// A `Writer` which writes to an `absl::Cord`. If `Options::append()` is `false`
// (the default), replaces existing contents of the `absl::Cord`, clearing it
// first. If `Options::append()` is `true`, appends to existing contents of the
// `absl::Cord`.
//
// It supports `Seek()` and `ReadMode()`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the `absl::Cord` being written to. `Dest` must support
// `Dependency<absl::Cord*, Dest>`, e.g. `absl::Cord*` (not owned, default),
// `absl::Cord` (owned), `Any<absl::Cord*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `absl::Cord`
// if there are no constructor arguments or the only argument is `Options`,
// otherwise as `TargetT` of the type of the first constructor argument, except
// that CTAD is deleted if the first constructor argument is an `absl::Cord&`
// or `const absl::Cord&` (to avoid writing to an unintentionally separate copy
// of an existing object).
//
// The `absl::Cord` must not be accessed until the `CordWriter` is closed or no
// longer used, except that it is allowed to read the `absl::Cord` immediately
// after `Flush()`.
template <typename Dest = absl::Cord*>
class CordWriter : public CordWriterBase {
 public:
  // Creates a closed `CordWriter`.
  explicit CordWriter(Closed) noexcept : CordWriterBase(kClosed) {}

  // Will write to the `absl::Cord` provided by `dest`.
  explicit CordWriter(Initializer<Dest> dest, Options options = Options());

  // Will write to an owned `absl::Cord` which can be accessed by `dest()`.
  // This constructor is present only if `Dest` is `absl::Cord`.
  template <
      typename DependentDest = Dest,
      std::enable_if_t<std::is_same_v<DependentDest, absl::Cord>, int> = 0>
  explicit CordWriter(Options options = Options());

  CordWriter(CordWriter&& that) = default;
  CordWriter& operator=(CordWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `CordWriter`. This avoids
  // constructing a temporary `CordWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());
  template <
      typename DependentDest = Dest,
      std::enable_if_t<std::is_same_v<DependentDest, absl::Cord>, int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Options options = Options());

  // Returns the object providing and possibly owning the `absl::Cord` being
  // written to. Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  absl::Cord* DestCord() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 private:
  // The object providing and possibly owning the `absl::Cord` being written to.
  Dependency<absl::Cord*, Dest> dest_;
};

explicit CordWriter(Closed) -> CordWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit CordWriter(Dest&& dest,
                    CordWriterBase::Options options = CordWriterBase::Options())
    -> CordWriter<std::conditional_t<
        std::conjunction_v<std::is_lvalue_reference<Dest>,
                           std::is_convertible<std::remove_reference_t<Dest>*,
                                               const absl::Cord*>>,
        DeleteCtad<Dest&&>, TargetT<Dest>>>;
explicit CordWriter(CordWriterBase::Options options = CordWriterBase::Options())
    -> CordWriter<absl::Cord>;

// Implementation details follow.

inline CordWriterBase::CordWriterBase(const Options& options)
    : min_block_size_(IntCast<uint32_t>(options.min_block_size())),
      max_block_size_(IntCast<uint32_t>(options.max_block_size())) {}

inline CordWriterBase::CordWriterBase(CordWriterBase&& that) noexcept
    : Writer(static_cast<Writer&&>(that)),
      size_hint_(that.size_hint_),
      min_block_size_(that.min_block_size_),
      max_block_size_(that.max_block_size_),
      buffer_(std::move(that.buffer_)),
      tail_(std::move(that.tail_)),
      associated_reader_(std::move(that.associated_reader_)) {
  MoveCordBuffer(that);
}

inline CordWriterBase& CordWriterBase::operator=(
    CordWriterBase&& that) noexcept {
  Writer::operator=(static_cast<Writer&&>(that));
  size_hint_ = that.size_hint_;
  min_block_size_ = that.min_block_size_;
  max_block_size_ = that.max_block_size_;
  buffer_ = std::move(that.buffer_);
  tail_ = std::move(that.tail_);
  associated_reader_ = std::move(that.associated_reader_);
  MoveCordBuffer(that);
  return *this;
}

inline void CordWriterBase::Reset(Closed) {
  Writer::Reset(kClosed);
  size_hint_ = std::nullopt;
  min_block_size_ = uint32_t{kDefaultMinBlockSize};
  max_block_size_ =
      uint32_t{absl::CordBuffer::MaximumPayload(kDefaultMaxBlockSize)};
  cord_buffer_ = absl::CordBuffer();
  buffer_ = Buffer();
  tail_.reset();
  associated_reader_.Reset();
}

inline void CordWriterBase::Reset(const Options& options) {
  Writer::Reset();
  size_hint_ = std::nullopt;
  min_block_size_ = IntCast<uint32_t>(options.min_block_size());
  max_block_size_ = IntCast<uint32_t>(options.max_block_size());
  if (tail_ != nullptr) tail_->Clear();
  associated_reader_.Reset();
}

inline void CordWriterBase::Initialize(absl::Cord* dest, bool append) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of CordWriter: null Cord pointer";
  if (append) {
    cord_buffer_ = dest->GetAppendBuffer(0, 1);
    set_start_pos(dest->size());
    const size_t existing_length = cord_buffer_.length();
    if (existing_length > 0) {
      cord_buffer_.SetLength(
          UnsignedMin(cord_buffer_.capacity(),
                      std::numeric_limits<size_t>::max() - dest->size()));
      set_buffer(cord_buffer_.data(), cord_buffer_.length(), existing_length);
    }
  } else {
    cord_buffer_ = dest->GetAppendBuffer(0, 0);
    dest->Clear();
    cord_buffer_.SetLength(0);
  }
}

inline void CordWriterBase::MoveCordBuffer(CordWriterBase& that) {
  // Buffer pointers are already moved so `start()` is taken from `*this`.
  // `cord_buffer_` is not moved yet so `cord_buffer_` is taken from `that`.
  const bool uses_cord_buffer = start() == that.cord_buffer_.data();
  if (uses_cord_buffer) {
    RIEGELI_ASSERT_EQ(that.cord_buffer_.length(), start_to_limit())
        << "Failed invariant of CordWriter: "
           "CordBuffer has an unexpected length";
  }
  const size_t saved_start_to_cursor = start_to_cursor();
  cord_buffer_ = std::move(that.cord_buffer_);
  if (uses_cord_buffer) {
    set_buffer(cord_buffer_.data(), cord_buffer_.length(),
               saved_start_to_cursor);
  }
}

template <typename Dest>
inline CordWriter<Dest>::CordWriter(Initializer<Dest> dest, Options options)
    : CordWriterBase(options), dest_(std::move(dest)) {
  Initialize(dest_.get(), options.append());
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<std::is_same_v<DependentDest, absl::Cord>, int>>
inline CordWriter<Dest>::CordWriter(Options options)
    : CordWriter(riegeli::Maker(), std::move(options)) {}

template <typename Dest>
inline void CordWriter<Dest>::Reset(Closed) {
  CordWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void CordWriter<Dest>::Reset(Initializer<Dest> dest, Options options) {
  CordWriterBase::Reset(options);
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options.append());
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<std::is_same_v<DependentDest, absl::Cord>, int>>
inline void CordWriter<Dest>::Reset(Options options) {
  Reset(riegeli::Maker(), std::move(options));
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_CORD_WRITER_H_


================================================
FILE: riegeli/bytes/fd_handle.cc
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Make `openat()` available.
#if !defined(_POSIX_C_SOURCE) || _POSIX_C_SOURCE < 200809
#undef _POSIX_C_SOURCE
#define _POSIX_C_SOURCE 200809
#endif

#include "riegeli/bytes/fd_handle.h"

#include <fcntl.h>
#ifdef _WIN32
#include <io.h>
#include <share.h>
#else
#include <stddef.h>
#include <unistd.h>
#endif

#include <cerrno>
#ifndef _WIN32
#include <utility>
#endif

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/status.h"
#include "riegeli/base/type_erased_ref.h"
#ifdef _WIN32
#include "riegeli/base/unicode.h"
#endif
#include "riegeli/bytes/path_ref.h"

namespace riegeli {

namespace fd_internal {

template class FdBase<UnownedFdDeleter>;
template class FdBase<OwnedFdDeleter>;

}  // namespace fd_internal

int FdHandle::GetMethodDefault(ABSL_ATTRIBUTE_UNUSED TypeErasedRef target) {
  return -1;
}

bool FdHandle::IsOwningMethodDefault(
    ABSL_ATTRIBUTE_UNUSED TypeErasedRef target) {
  return false;
}

absl::string_view FdHandle::FilenameMethodDefault(
    ABSL_ATTRIBUTE_UNUSED TypeErasedRef target) {
  return kDefaultFilename;
}

absl::Status FdHandle::CloseMethodDefault(
    ABSL_ATTRIBUTE_UNUSED TypeErasedRef target) {
  return absl::OkStatus();
}

absl::Status OwnedFd::Open(PathInitializer filename, int mode,
                           Permissions permissions) {
  Reset(-1, std::move(filename));
#ifndef _WIN32
again:
  const int fd = open(c_filename(), mode, permissions);
  if (ABSL_PREDICT_FALSE(fd < 0)) {
    const int error_number = errno;
    if (error_number == EINTR) goto again;
    return Annotate(absl::ErrnoToStatus(error_number, "open() failed"),
                    absl::StrCat("opening ", this->filename()));
  }
#else   // _WIN32
  std::wstring filename_wide;
  if (ABSL_PREDICT_FALSE(!Utf8ToWide(this->filename(), filename_wide))) {
    return absl::InvalidArgumentError(
        absl::StrCat("Filename not valid UTF-8: ", this->filename()));
  }
  int fd;
  if (ABSL_PREDICT_FALSE(_wsopen_s(&fd, filename_wide.c_str(), mode, _SH_DENYNO,
                                   permissions) != 0)) {
    const int error_number = errno;
    return Annotate(absl::ErrnoToStatus(error_number, "_wsopen_s() failed"),
                    absl::StrCat("opening ", this->filename()));
  }
#endif  // _WIN32
  SetFdKeepFilename(fd);
  return absl::OkStatus();
}

#ifndef _WIN32
absl::Status OwnedFd::OpenAt(UnownedFd dir_fd, PathRef filename, int mode,
                             Permissions permissions) {
  absl::string_view dir_filename;
  absl::string_view separator;
  if (dir_fd != AT_FDCWD && (filename.empty() || filename.front() != '/')) {
    dir_filename = dir_fd.filename();
    if (!dir_filename.empty() && dir_filename.back() != '/') separator = "/";
  }
  Reset(-1, absl::StrCat(dir_filename, separator, filename));

again:
  const int fd = openat(dir_fd.get(),
                        c_filename() + dir_filename.size() + separator.size(),
                        mode, permissions);
  if (ABSL_PREDICT_FALSE(fd < 0)) {
    const int error_number = errno;
    if (error_number == EINTR) goto again;
    return Annotate(absl::ErrnoToStatus(error_number, "openat() failed"),
                    absl::StrCat("opening ", this->filename()));
  }
  SetFdKeepFilename(fd);
  return absl::OkStatus();
}
#endif  // !_WIN32

absl::Status OwnedFd::Close() {
  const int fd = Release();
  if (fd < 0) return absl::OkStatus();
#ifndef _WIN32
  // http://austingroupbugs.net/view.php?id=529 explains this mess.
#ifdef POSIX_CLOSE_RESTART
  // Avoid `EINTR` by using `posix_close(_, 0)` if available.
  if (ABSL_PREDICT_FALSE(posix_close(fd, 0) < 0)) {
    const int error_number = errno;
    if (error_number != EINPROGRESS) {
      return Annotate(absl::ErrnoToStatus(error_number, "posix_close() failed"),
                      absl::StrCat("closing ", filename()));
    }
  }
#else   // !POSIX_CLOSE_RESTART
  if (ABSL_PREDICT_FALSE(close(fd) < 0)) {
    const int error_number = errno;
    // After `EINTR` it is unspecified whether `fd` has been closed or not.
    // Assume that it is closed, which is the case e.g. on Linux.
    if (error_number != EINPROGRESS && error_number != EINTR) {
      return Annotate(absl::ErrnoToStatus(error_number, "close() failed"),
                      absl::StrCat("closing ", filename()));
    }
  }
#endif  // !POSIX_CLOSE_RESTART
#else   // _WIN32
  if (ABSL_PREDICT_FALSE(_close(fd) < 0)) {
    const int error_number = errno;
    return Annotate(absl::ErrnoToStatus(error_number, "_close() failed"),
                    absl::StrCat("closing ", filename()));
  }
#endif  // _WIN32
  return absl::OkStatus();
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/fd_handle.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_FD_HANDLE_H_
#define RIEGELI_BYTES_FD_HANDLE_H_

#ifdef _WIN32
#include <sys/stat.h>
#else
#include <sys/types.h>
#endif

#ifdef _WIN32
#include <io.h>
#else
#include <unistd.h>
#endif

#include <cstddef>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/any.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/shared_ptr.h"
#include "riegeli/base/type_erased_ref.h"
#include "riegeli/base/type_traits.h"
#include "riegeli/bytes/fd_internal.h"
#include "riegeli/bytes/path_ref.h"

namespace riegeli {

namespace fd_internal {

#ifndef _WIN32
using Permissions = mode_t;
#else
using Permissions = int;
#endif

}  // namespace fd_internal

class UnownedFd;

// `SupportsFdHandle<T>::value` is `true` if `T&` is a valid constructor
// argument for `FdHandle`.

template <typename T, typename Enable = void>
struct SupportsFdHandle : std::false_type {};

template <typename T>
struct SupportsFdHandle<
    T, std::enable_if_t<std::conjunction_v<
           std::negation<std::is_const<T>>,
           std::is_convertible<decltype(std::declval<const T&>().get()), int>>>>
    : std::true_type {};

// `FdSupportsOpen<T>::value` is `true` if `T` supports `Open()` with the
// signature like in `OwnedFd`, but `permissions` can be required.

template <typename T, typename Enable = void>
struct FdSupportsOpen : std::false_type {};

template <typename T>
struct FdSupportsOpen<
    T, std::enable_if_t<std::is_convertible_v<
           decltype(std::declval<T&>().Open(
               std::declval<PathInitializer>(), std::declval<int>(),
               std::declval<fd_internal::Permissions>())),
           absl::Status>>> : std::true_type {};

// `FdSupportsOpenAt<T>::value` is `true` if `T` supports `OpenAt()` with the
// signature like in `OwnedFd` (with `permissions` present).

template <typename T, typename Enable = void>
struct FdSupportsOpenAt : std::false_type {};

template <typename T>
struct FdSupportsOpenAt<
    T, std::enable_if_t<std::is_convertible_v<
           decltype(std::declval<T&>().OpenAt(
               std::declval<UnownedFd>(), std::declval<PathRef>(),
               std::declval<int>(), std::declval<fd_internal::Permissions>())),
           absl::Status>>> : std::true_type {};

// Type-erased pointer to a target object like `UnownedFd` or `OwnedFd` which
// stores and possibly owns a fd.
//
// The target should support:
//
// ```
//   // Returns the fd.
//   int get() const;
//
//   // Returns `true` if the target owns the fd, i.e. is responsible for
//   // closing it and the fd is present.
//   //
//   // Optional. If absent, the presence of `Close()` determines whether the
//   // target is considered to own the fd.
//   bool IsOwning() const;
//
//   // Opens a new fd, like with `open()`, but taking
//   // `PathInitializer filename` instead of `const char* filename` and
//   // returning `absl::Status` instead of `int`.
//   //
//   // Optional. Not used by `FdHandle` itself. Used by `FdReader` and
//   // `FdWriter` constructors from the filename.
//   absl::Status Open(PathInitializer filename, int mode,
//                     OwnedFd::Permissions permissions);
//
//   // Returns the filename of the fd, or "<none>" for
//   // default-constructed or moved-from target. Unchanged by `Close()`.
//   //
//   // If `Open()` was used, this is the filename passed to `Open()`, otherwise
//   // a filename is inferred from the fd. This can be a placeholder instead of
//   // a real filename if the fd does not refer to a named file or inferring
//   // the filename is not supported.
//   //
//   // Optional. If absent, "<unsupported>" is assumed.
//   absl::string_view filename() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
//
//   // If `IsOwning()`, closes the fd.
//   //
//   // If `!IsOwning()`, does nothing and returns `absl::OkStatus()`.
//   //
//   // Optional. If absent, `absl::OkStatus()` is assumed.
//   absl::Status Close();
// ```
class FdHandle : public WithEqual<FdHandle> {
 public:
  // Creates an `FdHandle` which does not refer to a target.
  FdHandle() = default;
  /*implicit*/ FdHandle(std::nullptr_t) {}

  // Creates an `FdHandle` which refers to `target`.
  template <typename T,
            std::enable_if_t<std::conjunction_v<NotSameRef<FdHandle, T&>,
                                                SupportsFdHandle<T>>,
                             int> = 0>
  /*implicit*/ FdHandle(T& target ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : methods_(&kMethods<T>), target_(target) {}

  FdHandle(const FdHandle& that) = default;
  FdHandle& operator=(const FdHandle& that) = default;

  // Returns `true` if the fd is present.
  bool is_open() const { return *this != nullptr; }

  // Returns the fd.
  int get() const { return methods_->get(target_); }

  // Returns `true` if the `FdHandle` owns the fd, i.e. is responsible for
  // closing it and the fd is present.
  bool IsOwning() const { return methods_->is_owning(target_); }

  // Returns the filename of the fd, or "<none>" for default-constructed or
  // moved-from target. Unchanged by `Close()`.
  //
  // If `Open()` was used, this is the filename passed to `Open()`, otherwise a
  // filename is inferred from the fd. This can be a placeholder instead of a
  // real filename if the fd does not refer to a named file or inferring the
  // filename is not supported.
  //
  // If the target does not support `filename()`, returns "<unsupported>".
  absl::string_view filename() const { return methods_->filename(target_); }

  // If `IsOwning()`, closes the fd.
  //
  // If `!IsOwning()`, does nothing and returns `absl::OkStatus()`.
  absl::Status Close() { return methods_->close(target_); }

  friend bool operator==(FdHandle a, FdHandle b) { return a.get() == b.get(); }
  friend bool operator==(FdHandle a, int b) { return a.get() == b; }
  friend bool operator==(FdHandle a, std::nullptr_t) {
    return a.target_.empty() || a.get() < 0;
  }

 private:
  struct Methods {
    int (*get)(TypeErasedRef target);
    bool (*is_owning)(TypeErasedRef target);
    absl::string_view (*filename)(TypeErasedRef target);
    absl::Status (*close)(TypeErasedRef target);
  };

  template <typename T, typename Enable = void>
  struct HasIsOwning : std::false_type {};
  template <typename T>
  struct HasIsOwning<T,
                     std::enable_if_t<std::is_convertible_v<
                         decltype(std::declval<const T&>().IsOwning()), bool>>>
      : std::true_type {};

  template <typename T, typename Enable = void>
  struct HasFilename : std::false_type {};
  template <typename T>
  struct HasFilename<
      T, std::enable_if_t<std::is_convertible_v<
             decltype(std::declval<const T&>().filename()), absl::string_view>>>
      : std::true_type {};

  template <typename T, typename Enable = void>
  struct HasClose : std::false_type {};
  template <typename T>
  struct HasClose<T, std::enable_if_t<std::is_convertible_v<
                         decltype(std::declval<T&>().Close()), absl::Status>>>
      : std::true_type {};

  static int GetMethodDefault(TypeErasedRef target);
  static bool IsOwningMethodDefault(TypeErasedRef target);
  static absl::string_view FilenameMethodDefault(TypeErasedRef target);
  static absl::Status CloseMethodDefault(TypeErasedRef target);

  static constexpr Methods kMethodsDefault = {
      GetMethodDefault, IsOwningMethodDefault, FilenameMethodDefault,
      CloseMethodDefault};

  template <typename T>
  static int GetMethod(TypeErasedRef target) {
    return target.Cast<const T&>().get();
  }

  template <typename T>
  static bool IsOwningMethod(TypeErasedRef target) {
    if constexpr (HasIsOwning<T>::value) {
      return target.Cast<const T&>().IsOwning();
    } else if constexpr (HasClose<T>::value) {
      return target.Cast<const T&>().get() >= 0;
    } else {
      return false;
    }
  }

  template <typename T>
  static absl::string_view FilenameMethod(TypeErasedRef target) {
    if constexpr (HasFilename<T>::value) {
      return target.Cast<const T&>().filename();
    } else {
      return "<unsupported>";
    }
  }

  template <typename T>
  static absl::Status CloseMethod(TypeErasedRef target) {
    if constexpr (HasClose<T>::value) {
      return target.Cast<T&>().Close();
    } else {
      return absl::OkStatus();
    }
  }

  template <typename T>
  static constexpr Methods kMethods = {GetMethod<T>, IsOwningMethod<T>,
                                       FilenameMethod<T>, CloseMethod<T>};

  const Methods* methods_ = &kMethodsDefault;
  TypeErasedRef target_;
};

namespace fd_internal {

class UnownedFdDeleter;

// Common parts of `UnownedFdDeleter` and `OwnedFdDeleter`.
class ABSL_ATTRIBUTE_TRIVIAL_ABI FdDeleterBase {
 public:
  FdDeleterBase() = default;

  explicit FdDeleterBase(PathInitializer filename)
      : metadata_(riegeli::Maker<Metadata>(std::move(filename))) {}

  // Supports creating a `FdBase` converted from `UnownedFd`.
  explicit FdDeleterBase(const UnownedFdDeleter& that);
  explicit FdDeleterBase(UnownedFdDeleter&& that);

  void Reset() { metadata_ = nullptr; }

  void Reset(PathInitializer filename) {
    if (!metadata_.IsUnique()) {
      metadata_.Reset(riegeli::Maker<Metadata>(std::move(filename)));
    } else {
      riegeli::Reset(metadata_->filename, std::move(filename));
    }
  }

  // Supports creating a `FdBase` converted from `UnownedFd`.
  void Reset(const UnownedFdDeleter& that);

  // Supports creating a `FdBase` converted from `UnownedFd`, and resetting
  // `FdBase` from the same `FdBase`.
  void Reset(FdDeleterBase&& that);

  absl::string_view filename() const {
    if (ABSL_PREDICT_FALSE(metadata_ == nullptr)) return kDefaultFilename;
    return metadata_->filename;
  }

  const char* c_filename() const {
    if (ABSL_PREDICT_FALSE(metadata_ == nullptr)) return kDefaultFilenameCStr;
    return metadata_->filename.c_str();
  }

 protected:
  FdDeleterBase(const FdDeleterBase& that) = default;
  FdDeleterBase& operator=(const FdDeleterBase& that) = default;

  FdDeleterBase(FdDeleterBase&& that) = default;
  FdDeleterBase& operator=(FdDeleterBase&& that) = default;

 private:
  struct Metadata {
    explicit Metadata(PathInitializer filename)
        : filename(std::move(filename)) {}

    std::string filename;
  };

  // `nullptr` means `filename = kDefaultFilename`.
  SharedPtr<Metadata> metadata_;
};

class UnownedFdDeleter : public FdDeleterBase {
 public:
  using FdDeleterBase::FdDeleterBase;

  // Supports creating an `UnownedFd` converted from any `FdBase`.
  explicit UnownedFdDeleter(const FdDeleterBase& that) : FdDeleterBase(that) {}

  UnownedFdDeleter(const UnownedFdDeleter& that) = default;
  UnownedFdDeleter& operator=(const UnownedFdDeleter& that) = default;

  UnownedFdDeleter(UnownedFdDeleter&& that) = default;
  UnownedFdDeleter& operator=(UnownedFdDeleter&& that) = default;

  using FdDeleterBase::Reset;
  // Supports creating an `UnownedFd` converted from any `FdBase`.
  void Reset(const FdDeleterBase& that) { FdDeleterBase::operator=(that); }

  static void Destroy(ABSL_ATTRIBUTE_UNUSED int fd) {}
};

class OwnedFdDeleter : public FdDeleterBase {
 public:
  using FdDeleterBase::FdDeleterBase;

  OwnedFdDeleter(OwnedFdDeleter&& that) = default;
  OwnedFdDeleter& operator=(OwnedFdDeleter&& that) = default;

  static void Destroy(int fd) {
#ifndef _WIN32
    // http://austingroupbugs.net/view.php?id=529 explains this mess.
#ifdef POSIX_CLOSE_RESTART
    // Avoid `EINTR` by using `posix_close(_, 0)` if available.
    posix_close(fd, 0);
#else   // !POSIX_CLOSE_RESTART
    close(fd);
#endif  // !POSIX_CLOSE_RESTART
#else   // _WIN32
    _close(fd);
#endif  // _WIN32
  }
};

inline FdDeleterBase::FdDeleterBase(const UnownedFdDeleter& that)
    : metadata_(that.metadata_) {}

inline FdDeleterBase::FdDeleterBase(UnownedFdDeleter&& that)
    : metadata_(std::move(that.metadata_)) {}

inline void FdDeleterBase::Reset(const UnownedFdDeleter& that) {
  metadata_ = that.metadata_;
}

inline void FdDeleterBase::Reset(FdDeleterBase&& that) {
  metadata_ = std::move(that.metadata_);
}

// Common parts of `UnownedFd` and `OwnedFd`.
template <typename Deleter>
class ABSL_ATTRIBUTE_TRIVIAL_ABI FdBase {
 public:
  // Creates an `FdBase` which does not store a fd and stores "<none>" as the
  // filename.
  FdBase() = default;
  /*implicit*/ FdBase(std::nullptr_t) {}

  // Creates an `FdBase` which stores `fd` with the filename inferred from the
  // fd (or "<none>" if `fd < 0`).
  explicit FdBase(int fd ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : fd_(fd), deleter_(fd_ < 0 ? Deleter() : Deleter(FilenameForFd(fd_))) {}

  // Creates an `FdBase` which stores `fd` with `filename`.
  explicit FdBase(int fd ABSL_ATTRIBUTE_LIFETIME_BOUND,
                  PathInitializer filename)
      : fd_(fd), deleter_(std::move(filename)) {}

  // Creates a `FdBase` converted from `UnownedFd`.
  template <typename DependentDeleter = Deleter,
            std::enable_if_t<
                !std::is_same_v<DependentDeleter, UnownedFdDeleter>, int> = 0>
  explicit FdBase(const FdBase<UnownedFdDeleter>& that)
      : fd_(that.fd_), deleter_(that.deleter_) {}
  template <typename DependentDeleter = Deleter,
            std::enable_if_t<
                !std::is_same_v<DependentDeleter, UnownedFdDeleter>, int> = 0>
  explicit FdBase(FdBase<UnownedFdDeleter>&& that)
      : fd_(that.Release()), deleter_(std::move(that.deleter_)) {}

  // Creates an `UnownedFd` converted from any `FdBase`.
  template <
      typename OtherDeleter,
      std::enable_if_t<
          std::conjunction_v<
              std::is_same<Deleter, UnownedFdDeleter>,
              std::negation<std::is_same<OtherDeleter, UnownedFdDeleter>>>,
          int> = 0>
  /*implicit*/ FdBase(
      const FdBase<OtherDeleter>& that ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : fd_(that.fd_), deleter_(that.deleter_) {}

  // Makes `*this` equivalent to a newly constructed `FdBase`.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(std::nullptr_t = nullptr) {
    SetFdKeepFilename();
    deleter_.Reset();
  }
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(int fd) {
    SetFdKeepFilename(fd);
    if (fd < 0) {
      deleter_.Reset();
    } else {
      deleter_.Reset(FilenameForFd(fd));
    }
  }
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(int fd, PathInitializer filename) {
    SetFdKeepFilename(fd);
    deleter_.Reset(std::move(filename));
  }
  template <typename OtherDeleter,
            std::enable_if_t<std::disjunction_v<
                                 std::is_same<Deleter, UnownedFdDeleter>,
                                 std::is_same<OtherDeleter, UnownedFdDeleter>>,
                             int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(const FdBase<OtherDeleter>& that) {
    SetFdKeepFilename(that.fd_);
    deleter_.Reset(that.deleter_);
  }
  template <typename OtherDeleter,
            std::enable_if_t<
                std::disjunction_v<std::is_same<OtherDeleter, UnownedFdDeleter>,
                                   std::is_same<OtherDeleter, Deleter>>,
                int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(FdBase<OtherDeleter>&& that) {
    SetFdKeepFilename(that.Release());
    deleter_.Reset(std::move(that.deleter_));
  }

  // Sets the fd, keeping `filename()` unchanged.
  void SetFdKeepFilename(int fd = -1) {
    Destroy();
    fd_ = fd;
  }

  // Returns `true` if the fd is present.
  bool is_open() const { return fd_ >= 0; }

  // Returns the fd.
  int get() const { return fd_; }

  // Returns the filename of the fd, or "<none>" for default-constructed or
  // moved-from `FdBase`. Unchanged by `Close()` and `Release()`.
  //
  // If `Open()` was used, this is the filename passed to `Open()`, otherwise
  // a filename is inferred from the fd. This can be a placeholder instead of
  // a real filename if the fd does not refer to a named file or inferring the
  // filename is not supported.
  absl::string_view filename() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return deleter_.filename();
  }

  // Returns `filename()` as a NUL-terminated string.
  const char* c_filename() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return deleter_.c_filename();
  }

 protected:
  FdBase(const FdBase& that) = default;
  FdBase& operator=(const FdBase& that) = default;

  FdBase(FdBase&& that) noexcept
      : fd_(that.Release()), deleter_(std::move(that.deleter_)) {}
  FdBase& operator=(FdBase&& that) noexcept {
    const int fd = that.Release();
    Destroy();
    fd_ = fd;
    deleter_ = std::move(that.deleter_);
    return *this;
  }

  ~FdBase() { Destroy(); }

  // Returns the fd. The stored fd is left absent, without modifying
  // `filename()`.
  int Release() { return std::exchange(fd_, -1); }

 private:
  template <typename OtherDeleter>
  friend class FdBase;  // For conversions.

  void Destroy() {
    if (is_open()) deleter_.Destroy(fd_);
  }

  int fd_ = -1;
  Deleter deleter_;
};

extern template class FdBase<UnownedFdDeleter>;
extern template class FdBase<OwnedFdDeleter>;

}  // namespace fd_internal

// Stores a file descriptor but does not own it, i.e. is not responsible for
// closing it.
//
// The fd can be negative which means absent.
class UnownedFd : public fd_internal::FdBase<fd_internal::UnownedFdDeleter>,
                  public WithEqual<UnownedFd> {
 public:
  using FdBase::FdBase;

  // Overridden to make implicit.
  /*implicit*/ UnownedFd(int fd ABSL_ATTRIBUTE_LIFETIME_BOUND) : FdBase(fd) {}

  // Creates an `UnownedFd` which stores `fd.get()` with `fd.filename()`.
  explicit UnownedFd(FdHandle fd) : FdBase(fd.get(), fd.filename()) {}

  UnownedFd(const UnownedFd& that) = default;
  UnownedFd& operator=(const UnownedFd& that) = default;

  // The moved-from fd is left absent.
  UnownedFd(UnownedFd&& that) = default;
  UnownedFd& operator=(UnownedFd&& that) = default;

  using FdBase::Reset;
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(FdHandle fd) {
    Reset(fd.get(), fd.filename());
  }

  friend bool operator==(const UnownedFd& a, const UnownedFd& b) {
    return a.get() == b.get();
  }
  friend bool operator==(const UnownedFd& a, int b) { return a.get() == b; }
  friend bool operator==(const UnownedFd& a, std::nullptr_t) {
    return a.get() < 0;
  }
};

// Owns a file descriptor, i.e. stores it and is responsible for closing it.
//
// The fd can be negative which means absent.
class OwnedFd : public fd_internal::FdBase<fd_internal::OwnedFdDeleter>,
                public WithEqual<OwnedFd> {
 public:
  using Permissions = fd_internal::Permissions;
#ifndef _WIN32
  static constexpr Permissions kDefaultPermissions = 0666;
#else
  static constexpr Permissions kDefaultPermissions = _S_IREAD | _S_IWRITE;
#endif

  using FdBase::FdBase;

  // The moved-from fd is left absent.
  OwnedFd(OwnedFd&& that) = default;
  OwnedFd& operator=(OwnedFd&& that) = default;

  // Overridden to apply `ABSL_ATTRIBUTE_LIFETIME_BOUND`.
  int get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return FdBase::get(); }

  using FdBase::Release;

  // Opens a new fd, like with `open()`, but taking `PathInitializer filename`
  // instead of `const char* filename` and returning `absl::Status` instead of
  // `int`.
  ABSL_ATTRIBUTE_REINITIALIZES absl::Status Open(
      PathInitializer filename, int mode,
      Permissions permissions = kDefaultPermissions);

#ifndef _WIN32
  // Opens a new fd with the filename interpreted relatively to the directory
  // specified by an existing fd, like with `openat()`, but taking
  // `PathRef filename` instead of `const char* filename` and returning
  // `absl::Status` instead of `int`.
  ABSL_ATTRIBUTE_REINITIALIZES absl::Status OpenAt(
      UnownedFd dir_fd, PathRef filename, int mode,
      Permissions permissions = kDefaultPermissions);
#endif  // !_WIN32

  // Closes the fd if present.
  //
  // Returns `absl::OkStatus()` if absent.
  absl::Status Close();

  friend bool operator==(const OwnedFd& a, int b) { return a.get() == b; }
  friend bool operator==(const OwnedFd& a, std::nullptr_t) {
    return a.get() < 0;
  }
};

// Type-erased object like `UnownedFd` or `OwnedFd` which stores and possibly
// owns a fd.
using AnyFd = Any<FdHandle>::Inlining<UnownedFd, OwnedFd>;

}  // namespace riegeli

#endif  // RIEGELI_BYTES_FD_HANDLE_H_


================================================
FILE: riegeli/bytes/fd_internal.cc
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef _WIN32

// Make `readlink()` available, and make `O_CLOEXEC` available on Darwin.
#if !defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < 700
#undef _XOPEN_SOURCE
#define _XOPEN_SOURCE 700
#endif

#endif

#include "riegeli/bytes/fd_internal.h"

#ifdef __APPLE__
#include <fcntl.h>
#endif
#ifndef _WIN32
#include <stddef.h>
#include <unistd.h>
#endif

#include <string>

#ifndef _WIN32
#include "absl/base/optimization.h"
#endif
#include "absl/strings/str_cat.h"
#ifndef _WIN32
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/buffer.h"
#endif

namespace riegeli::fd_internal {

std::string FilenameForFd(int fd) {
#ifndef _WIN32
  std::string filename = absl::StrCat("/proc/self/fd/", fd);
  Buffer buffer(PATH_MAX);
  const ssize_t length = readlink(filename.c_str(), buffer.data(), PATH_MAX);
  if (ABSL_PREDICT_TRUE(length >= 0)) {
    filename.assign(buffer.data(), IntCast<size_t>(length));
  }
  return filename;
#else   // _WIN32
  return absl::StrCat("<fd ", fd, ">");
#endif  // _WIN32
}

#ifdef __APPLE__
// On Darwin `O_CLOEXEC` is available conditionally, so `kCloseOnExec` is
// defined out of line.
extern const int kCloseOnExec = O_CLOEXEC;
#endif  // __APPLE__

}  // namespace riegeli::fd_internal


================================================
FILE: riegeli/bytes/fd_internal.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_FD_INTERNAL_H_
#define RIEGELI_BYTES_FD_INTERNAL_H_

#ifndef __APPLE__
#include <fcntl.h>
#endif

#include <string>

namespace riegeli::fd_internal {

// Infers a filename from fd by reading the symlink target for
// `absl::StrCat("/proc/self/fd/", fd)` (on Windows returns a
// `absl::StrCat("<fd ", fd, ">")` placeholder instead).
std::string FilenameForFd(int fd);

#ifndef _WIN32
#ifndef __APPLE__
inline constexpr int kCloseOnExec = O_CLOEXEC;
#else   // __APPLE__
// On Darwin `O_CLOEXEC` is available conditionally, so `kCloseOnExec` is
// defined out of line.
extern const int kCloseOnExec;
#endif  // __APPLE__
#else   // _WIN32
inline constexpr int kCloseOnExec = _O_NOINHERIT;
#endif  // _WIN32

}  // namespace riegeli::fd_internal

#endif  // RIEGELI_BYTES_FD_INTERNAL_H_


================================================
FILE: riegeli/bytes/fd_internal_for_cc.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_FD_INTERNAL_FOR_CC_H_
#define RIEGELI_BYTES_FD_INTERNAL_FOR_CC_H_

// Warning: Do not include this header in other headers, because the definition
// of `off_t` depends on `_FILE_OFFSET_BITS` which can reliably be set only
// in a standalone compilation unit.

#ifdef _WIN32
#include <io.h>
#endif
#include <sys/stat.h>
#include <sys/types.h>
#ifndef _WIN32
#include <unistd.h>
#endif

#include "absl/strings/string_view.h"

namespace riegeli::fd_internal {

#ifndef _WIN32

using Offset = off_t;

inline Offset LSeek(int fd, Offset offset, int whence) {
  return lseek(fd, offset, whence);
}

inline constexpr absl::string_view kLSeekFunctionName = "lseek()";

using StatInfo = struct stat;

inline int FStat(int fd, StatInfo* stat_info) { return fstat(fd, stat_info); }

inline constexpr absl::string_view kFStatFunctionName = "fstat()";

#else  // _WIN32

using Offset = __int64;

inline Offset LSeek(int fd, Offset offset, int whence) {
  return _lseeki64(fd, offset, whence);
}

inline constexpr absl::string_view kLSeekFunctionName = "_lseeki64()";

// `struct __stat64` in a namespace does not work in MSVC due to a bug regarding
// https://en.cppreference.com/w/cpp/language/elaborated_type_specifier.
using StatInfo = struct ::__stat64;

inline int FStat(int fd, StatInfo* stat_info) {
  return _fstat64(fd, stat_info);
}

inline constexpr absl::string_view kFStatFunctionName = "_fstat64()";

#endif  // _WIN32

}  // namespace riegeli::fd_internal

#endif  // RIEGELI_BYTES_FD_INTERNAL_FOR_CC_H_


================================================
FILE: riegeli/bytes/fd_mmap_reader.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef _WIN32

// Make `posix_fadvise()` available.
#if !defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < 600
#undef _XOPEN_SOURCE
#define _XOPEN_SOURCE 600
#endif

// Make `off_t` 64-bit even on 32-bit systems.
#undef _FILE_OFFSET_BITS
#define _FILE_OFFSET_BITS 64

#else

#define WIN32_LEAN_AND_MEAN

#endif

#include "riegeli/bytes/fd_mmap_reader.h"

#include <fcntl.h>
#ifdef _WIN32
#include <io.h>
#endif
#include <stddef.h>
#ifdef _WIN32
#include <stdint.h>
#endif
#include <stdio.h>
#ifndef _WIN32
#include <sys/mman.h>
#endif
#include <sys/types.h>
#ifndef _WIN32
#include <unistd.h>
#else
#include <windows.h>
#endif

#include <cerrno>
#include <limits>
#include <memory>
#include <optional>
#include <ostream>
#ifndef _WIN32
#include <type_traits>
#endif
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#ifndef _WIN32
#include "absl/status/statusor.h"
#endif
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#ifdef _WIN32
#include "riegeli/base/errno_mapping.h"
#endif
#include "riegeli/base/external_ref.h"
#ifndef _WIN32
#include "riegeli/base/global.h"
#endif
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/chain_reader.h"
#include "riegeli/bytes/fd_handle.h"
#include "riegeli/bytes/fd_internal_for_cc.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

namespace {

#ifdef _WIN32

struct HandleDeleter {
  void operator()(void* handle) const {
    RIEGELI_CHECK(CloseHandle(reinterpret_cast<HANDLE>(handle)))
        << WindowsErrorToStatus(IntCast<uint32_t>(GetLastError()),
                                "CloseHandle() failed")
               .message();
  }
};

using UniqueHandle = std::unique_ptr<void, HandleDeleter>;

#endif  // _WIN32

#ifndef _WIN32

inline absl::StatusOr<Position> GetPageSize() {
  const long page_size = sysconf(_SC_PAGE_SIZE);
  if (ABSL_PREDICT_FALSE(page_size < 0)) {
    return absl::ErrnoToStatus(errno, "sysconf() failed");
  }
  return IntCast<Position>(page_size);
}

#else  // _WIN32

inline Position GetPageSize() {
  SYSTEM_INFO system_info;
  GetSystemInfo(&system_info);
  return IntCast<Position>(system_info.dwAllocationGranularity);
}

#endif  // _WIN32

#ifndef _WIN32

// `posix_fadvise()` is supported by POSIX systems but not MacOS.

template <typename DependentInt, typename Enable = void>
struct HavePosixFadvise : std::false_type {};

template <typename DependentInt>
struct HavePosixFadvise<
    DependentInt,
    std::void_t<decltype(posix_fadvise(
        std::declval<DependentInt>(), std::declval<fd_internal::Offset>(),
        std::declval<fd_internal::Offset>(), std::declval<int>()))>>
    : std::true_type {};

template <typename DependentInt>
inline void FdSetReadAllHint(ABSL_ATTRIBUTE_UNUSED DependentInt src,
                             ABSL_ATTRIBUTE_UNUSED bool read_all_hint) {
  if constexpr (HavePosixFadvise<DependentInt>::value) {
#ifdef POSIX_FADV_SEQUENTIAL
    posix_fadvise(src, 0, 0,
                  read_all_hint ? POSIX_FADV_SEQUENTIAL : POSIX_FADV_NORMAL);
#endif
  }
}

#endif  // !_WIN32

class MMapBlock {
 public:
  explicit MMapBlock(const char* addr) : addr_(addr) {}

  MMapBlock(MMapBlock&& that) = default;
  MMapBlock& operator=(MMapBlock&& that) = default;

  // Indicates support for `ExternalRef(MMapBlock&&, substr)`.
  friend void RiegeliSupportsExternalRef(MMapBlock*) {}

  // Supports `ExternalRef`, `Chain::Block`, and `absl::MakeCordFromExternal()`.
  void operator()(absl::string_view data) const;

  // Supports `ExternalRef` and `Chain::Block`.
  friend void RiegeliDumpStructure(ABSL_ATTRIBUTE_UNUSED const MMapBlock* self,
                                   std::ostream& dest) {
    dest << "[mmap] { }";
  }

 private:
  const char* addr_;
};

void MMapBlock::operator()(absl::string_view data) const {
#ifndef _WIN32
  RIEGELI_CHECK_EQ(munmap(const_cast<char*>(addr_),
                          data.size() + PtrDistance(addr_, data.data())),
                   0)
      << absl::ErrnoToStatus(errno, "munmap() failed").message();
#else   // _WIN32
  RIEGELI_CHECK(UnmapViewOfFile(addr_))
      << WindowsErrorToStatus(IntCast<uint32_t>(GetLastError()),
                              "UnmapViewOfFile() failed")
             .message();
#endif  // _WIN32
}

}  // namespace

void FdMMapReaderBase::Initialize(int src, Options&& options) {
  RIEGELI_ASSERT_GE(src, 0)
      << "Failed precondition of FdMMapReader: negative file descriptor";
  InitializePos(src, std::move(options));
}

void FdMMapReaderBase::InitializePos(int src, Options&& options) {
  Position initial_pos;
  if (options.independent_pos() != std::nullopt) {
    initial_pos = *options.independent_pos();
  } else {
    const fd_internal::Offset file_pos = fd_internal::LSeek(src, 0, SEEK_CUR);
    if (ABSL_PREDICT_FALSE(file_pos < 0)) {
      FailOperation(fd_internal::kLSeekFunctionName);
      return;
    }
    initial_pos = IntCast<Position>(file_pos);
  }

  fd_internal::StatInfo stat_info;
  if (ABSL_PREDICT_FALSE(fd_internal::FStat(src, &stat_info) < 0)) {
    FailOperation(fd_internal::kFStatFunctionName);
    return;
  }
  Position base_pos = 0;
  Position length = IntCast<Position>(stat_info.st_size);
  if (options.max_length() != std::nullopt) {
    base_pos = initial_pos;
    length =
        UnsignedMin(SaturatingSub(length, initial_pos), *options.max_length());
  }
  if (options.independent_pos() == std::nullopt) base_pos_to_sync_ = base_pos;
  if (length == 0) {
    // The `Chain` to read from was not known in `FdMMapReaderBase` constructor.
    // Set it now to empty.
    ChainReader::Reset(riegeli::Maker());
    return;
  }

  Position rounded_base_pos = base_pos;
  if (rounded_base_pos > 0) {
#ifndef _WIN32
    const absl::StatusOr<Position>& page_size =
        Global([] { return GetPageSize(); });
    if (ABSL_PREDICT_FALSE(!page_size.ok())) {
      Fail(page_size.status());
      return;
    }
    rounded_base_pos &= ~(*page_size - 1);
#else   // _WIN32
    static const Position kPageSize = GetPageSize();
    rounded_base_pos &= ~(kPageSize - 1);
#endif  // _WIN32
  }
  const Position rounding = base_pos - rounded_base_pos;
  const Position rounded_length = length + rounding;
  if (ABSL_PREDICT_FALSE(rounded_length > std::numeric_limits<size_t>::max())) {
    Fail(absl::OutOfRangeError("File too large for memory mapping"));
    return;
  }
#ifndef _WIN32
  void* const addr = mmap(nullptr, IntCast<size_t>(rounded_length), PROT_READ,
                          MAP_SHARED, src, IntCast<off_t>(rounded_base_pos));
  if (ABSL_PREDICT_FALSE(addr == MAP_FAILED)) {
    FailOperation("mmap()");
    return;
  }
#else   // _WIN32
  const HANDLE file_handle = reinterpret_cast<HANDLE>(_get_osfhandle(src));
  if (ABSL_PREDICT_FALSE(file_handle == INVALID_HANDLE_VALUE ||
                         file_handle == reinterpret_cast<HANDLE>(-2))) {
    FailWindowsOperation("_get_osfhandle()");
    return;
  }
  UniqueHandle memory_handle(reinterpret_cast<void*>(
      CreateFileMappingW(file_handle, nullptr, PAGE_READONLY, 0, 0, nullptr)));
  if (ABSL_PREDICT_FALSE(memory_handle == nullptr)) {
    FailWindowsOperation("CreateFileMappingW()");
    return;
  }
  void* const addr =
      MapViewOfFile(reinterpret_cast<HANDLE>(memory_handle.get()),
                    FILE_MAP_READ, IntCast<DWORD>(rounded_base_pos >> 32),
                    IntCast<DWORD>(rounded_base_pos & 0xffffffff),
                    IntCast<size_t>(rounded_length));
  if (ABSL_PREDICT_FALSE(addr == nullptr)) {
    FailWindowsOperation("MapViewOfFile()");
    return;
  }
#endif  // _WIN32

  // The `Chain` to read from was not known in `FdMMapReaderBase` constructor.
  // Set it now.
  ChainReader::Reset(riegeli::Maker(
      ExternalRef(riegeli::Maker<MMapBlock>(static_cast<const char*>(addr)),
                  absl::string_view(static_cast<const char*>(addr) + rounding,
                                    IntCast<size_t>(length)))));
  if (options.max_length() == std::nullopt) Seek(initial_pos);
}

void FdMMapReaderBase::Done() {
  FdMMapReaderBase::SyncImpl(SyncType::kFromObject);
  ChainReader::Done();
  ChainReader::src().Clear();
}

bool FdMMapReaderBase::FailOperation(absl::string_view operation) {
  const int error_number = errno;
  RIEGELI_ASSERT_NE(error_number, 0)
      << "Failed precondition of FdMMapReaderBase::FailOperation(): "
         "zero errno";
  return Fail(
      absl::ErrnoToStatus(error_number, absl::StrCat(operation, " failed")));
}

#ifdef _WIN32

bool FdMMapReaderBase::FailWindowsOperation(absl::string_view operation) {
  const DWORD error_number = GetLastError();
  RIEGELI_ASSERT_NE(error_number, 0)
      << "Failed precondition of FdMMapReaderBase::FailWindowsOperation(): "
         "zero error code";
  return Fail(WindowsErrorToStatus(IntCast<uint32_t>(error_number),
                                   absl::StrCat(operation, " failed")));
}

#endif  // _WIN32

absl::Status FdMMapReaderBase::AnnotateStatusImpl(absl::Status status) {
  return ChainReader::AnnotateStatusImpl(
      Annotate(status, absl::StrCat("reading ", filename())));
}

#ifndef _WIN32

void FdMMapReaderBase::SetReadAllHintImpl(bool read_all_hint) {
  ChainReader::SetReadAllHintImpl(read_all_hint);
  if (ABSL_PREDICT_FALSE(!ok())) return;
  const int src = SrcFd();
  FdSetReadAllHint(src, read_all_hint);
}

#endif  // !_WIN32

bool FdMMapReaderBase::SyncImpl(SyncType sync_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const int src = SrcFd();
  if (base_pos_to_sync_ != std::nullopt) {
    if (ABSL_PREDICT_FALSE(
            fd_internal::LSeek(
                src, IntCast<fd_internal::Offset>(*base_pos_to_sync_ + pos()),
                SEEK_SET) < 0)) {
      return FailOperation(fd_internal::kLSeekFunctionName);
    }
  }
  return true;
}

std::unique_ptr<Reader> FdMMapReaderBase::NewReaderImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point.
  std::unique_ptr<FdMMapReader<UnownedFd>> reader =
      std::make_unique<FdMMapReader<UnownedFd>>(kClosed);
  reader->InitializeWithExistingData(UnownedFd(SrcFdHandle()),
                                     ChainReader::src());
  reader->Seek(initial_pos);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/fd_mmap_reader.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_FD_MMAP_READER_H_
#define RIEGELI_BYTES_FD_MMAP_READER_H_

#include <fcntl.h>

#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/chain_reader.h"
#include "riegeli/bytes/fd_handle.h"
#include "riegeli/bytes/fd_internal.h"
#include "riegeli/bytes/path_ref.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

// Template parameter independent part of `FdMMapReader`.
class FdMMapReaderBase : public ChainReader<Chain> {
 public:
  class Options {
   public:
    Options() noexcept {}

    // If `FdMMapReader` opens a fd with a filename, `mode()` is the second
    // argument of `open()` (on Windows: `_open()`) and specifies the open mode
    // and flags, typically `O_RDONLY` (on Windows: `_O_RDONLY | _O_BINARY`).
    // It must include either `O_RDONLY` or `O_RDWR` (on Windows: `_O_RDONLY` or
    // `_O_RDWR`).
    //
    // If `FdMMapReader` reads from an already open fd, `mode()` has no effect.
    //
    // `mode()` can also be changed with `set_inheritable()`.
    //
    // Default: `O_RDONLY | O_CLOEXEC`
    // (on Windows: `_O_RDONLY | _O_BINARY | _O_NOINHERIT`).
    Options& set_mode(int mode) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      mode_ = mode;
      return *this;
    }
    Options&& set_mode(int mode) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_mode(mode));
    }
    int mode() const { return mode_; }

    // If `false`, `execve()` (`CreateProcess()` on Windows) will close the fd.
    //
    // If `true`, the fd will remain open across `execve()` (`CreateProcess()`
    // on Windows).
    //
    // If `FdMMapReader` reads from an already open fd, `inheritable()` has no
    // effect.
    //
    // `set_inheritable()` affects `mode()`.
    //
    // Default: `false`.
    Options& set_inheritable(bool inheritable) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      mode_ = (mode_ & ~fd_internal::kCloseOnExec) |
              (inheritable ? 0 : fd_internal::kCloseOnExec);
      return *this;
    }
    Options&& set_inheritable(bool inheritable) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_inheritable(inheritable));
    }
    bool inheritable() const {
      return (mode_ & fd_internal::kCloseOnExec) == 0;
    }

    // If `std::nullopt`, `FdMMapReader` reads starting from the current fd
    // position. The `FdMMapReader` position is synchronized back to the fd by
    // `Close()` and `Sync()`.
    //
    // If not `std::nullopt`, `FdMMapReader` reads starting from this position,
    // without disturbing the current fd position. This is useful for multiple
    // readers concurrently reading from the same fd.
    //
    // Default: `std::nullopt`.
    Options& set_independent_pos(std::optional<Position> independent_pos) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      independent_pos_ = independent_pos;
      return *this;
    }
    Options&& set_independent_pos(std::optional<Position> independent_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_independent_pos(independent_pos));
    }
    std::optional<Position> independent_pos() const { return independent_pos_; }

    // If `std::nullopt`, the whole file is mapped into memory. `pos()`
    // corresponds to original file positions.
    //
    // If not `std::nullopt`, only the range of this length starting from the
    // current position or `independent_pos()` is mapped into memory, or the
    // remaining part of the file if that is shorter. `pos()` starts from 0.
    //
    // Default: `std::nullopt`.
    Options& set_max_length(std::optional<Position> max_length) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      max_length_ = max_length;
      return *this;
    }
    Options&& set_max_length(std::optional<Position> max_length) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_max_length(max_length));
    }
    std::optional<Position> max_length() const { return max_length_; }

    // Sets `max_length()` to the remaining part of the file.
    Options& set_remaining_length() & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      set_max_length(std::numeric_limits<Position>::max());
      return *this;
    }
    Options&& set_remaining_length() && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_remaining_length());
    }

   private:
#ifndef _WIN32
    int mode_ = O_RDONLY | fd_internal::kCloseOnExec;
#else
    int mode_ = _O_RDONLY | _O_BINARY | fd_internal::kCloseOnExec;
#endif
    std::optional<Position> independent_pos_;
    std::optional<Position> max_length_;
  };

  // Returns the `FdHandle` being read from. Unchanged by `Close()`.
  virtual FdHandle SrcFdHandle() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns the fd being read from. If the fd is owned then changed to -1 by
  // `Close()`, otherwise unchanged.
  virtual int SrcFd() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns the filename of the fd being read from, or "<none>" for
  // closed-constructed or moved-from `FdMMapReader`. Unchanged by `Close()`.
  //
  // If the constructor from filename was used, this is the filename passed to
  // the constructor, otherwise a filename is inferred from the fd. This can be
  // a placeholder instead of a real filename if the fd does not refer to a
  // named file or inferring the filename is not supported.
  //
  // If `Src` does not support `filename()`, returns "<unsupported>".
  absl::string_view filename() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return SrcFdHandle().filename();
  }

  bool SupportsNewReader() override { return true; }

 protected:
  explicit FdMMapReaderBase(Closed) noexcept : ChainReader(kClosed) {}

  explicit FdMMapReaderBase();

  FdMMapReaderBase(FdMMapReaderBase&& that) noexcept;
  FdMMapReaderBase& operator=(FdMMapReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(int src, Options&& options);
  void InitializePos(int src, Options&& options);
  ABSL_ATTRIBUTE_COLD bool FailOperation(absl::string_view operation);
#ifdef _WIN32
  ABSL_ATTRIBUTE_COLD bool FailWindowsOperation(absl::string_view operation);
#endif

  void Done() override;
  absl::Status AnnotateStatusImpl(absl::Status status) override;
#ifndef _WIN32
  void SetReadAllHintImpl(bool read_all_hint) override;
#endif
  bool SyncImpl(SyncType sync_type) override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;

 private:
  std::optional<Position> base_pos_to_sync_;
};

// A `Reader` which reads from a file descriptor by mapping the whole file to
// memory.
//
// The fd must support:
#ifndef _WIN32
//  * `close()` - if the fd is owned
//  * `fstat()`
//  * `mmap()`
//  * `lseek()` - if `Options::independent_pos() == std::nullopt`
#else
//  * `_close()`    - if the fd is owned
//  * `_fstat64()`
//  * `_get_osfhandle()`, `CreateFileMappingW()`, `MapViewOfFile()`
//  * `_lseeki64()` - if `Options::independent_pos() == std::nullopt`
#endif
//
// `FdMMapReader` supports random access and `NewReader()`.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the fd being read from. `Src` must support
// `Dependency<FdHandle, Src>`, e.g. `OwnedFd` (owned, default),
// `UnownedFd` (not owned), `AnyFd` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `OwnedFd` if the
// first constructor argument is a filename or an `int`, otherwise as `TargetT`
// of the type of the first constructor argument.
//
// The fd must not be closed until the `FdMMapReader` is closed or no longer
// used. File contents must not be changed while data read from the file is
// accessed without a memory copy.
template <typename Src = OwnedFd>
class FdMMapReader : public FdMMapReaderBase {
 public:
  // Creates a closed `FdMMapReader`.
  explicit FdMMapReader(Closed) noexcept : FdMMapReaderBase(kClosed) {}

  // Will read from the fd provided by `src`.
  explicit FdMMapReader(Initializer<Src> src, Options options = Options());

  // Will read from `src`.
  template <
      typename DependentSrc = Src,
      std::enable_if_t<std::is_constructible_v<DependentSrc, int>, int> = 0>
  explicit FdMMapReader(int src ABSL_ATTRIBUTE_LIFETIME_BOUND,
                        Options options = Options());

  // Opens a file for reading.
  //
  // If opening the file fails, `FdMMapReader` will be failed and closed.
  //
  // This constructor is present only if `Src` supports `Open()`.
  template <typename DependentSrc = Src,
            std::enable_if_t<
                std::conjunction_v<FdSupportsOpen<DependentSrc>,
                                   std::is_default_constructible<DependentSrc>>,
                int> = 0>
  explicit FdMMapReader(PathInitializer filename, Options options = Options());

  // Opens a file for reading, with the filename interpreted relatively to the
  // directory specified by an existing fd.
  //
  // If opening the file fails, `FdMMapReader` will be failed and closed.
  //
  // This constructor is present only if `Src` supports `Open()`.
  template <typename DependentSrc = Src,
            std::enable_if_t<
                std::conjunction_v<FdSupportsOpenAt<DependentSrc>,
                                   std::is_default_constructible<DependentSrc>>,
                int> = 0>
  explicit FdMMapReader(UnownedFd dir_fd, PathRef filename,
                        Options options = Options());

  FdMMapReader(FdMMapReader&& that) = default;
  FdMMapReader& operator=(FdMMapReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `FdMMapReader`. This avoids
  // constructing a temporary `FdMMapReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());
  template <
      typename DependentSrc = Src,
      std::enable_if_t<std::is_constructible_v<DependentSrc, int>, int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(int src, Options options = Options());
  template <typename DependentSrc = Src,
            std::enable_if_t<std::conjunction_v<FdSupportsOpen<DependentSrc>,
                                                SupportsReset<DependentSrc>>,
                             int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(PathInitializer filename,
                                          Options options = Options());
  template <typename DependentSrc = Src,
            std::enable_if_t<std::conjunction_v<FdSupportsOpenAt<DependentSrc>,
                                                SupportsReset<DependentSrc>>,
                             int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(UnownedFd dir_fd,
                                          PathInitializer filename,
                                          Options options = Options());

  // Returns the object providing and possibly owning the fd being read from.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  FdHandle SrcFdHandle() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }
  int SrcFd() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get().get();
  }

  // An optimized implementation in a derived class, avoiding a virtual call.
  absl::string_view filename() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.get().filename();
  }

 protected:
  void Done() override;

 private:
  friend class FdMMapReaderBase;  // For `InitializeWithExistingData()`.

  template <typename DependentSrc = Src,
            std::enable_if_t<FdSupportsOpen<DependentSrc>::value, int> = 0>
  void OpenImpl(PathInitializer filename, Options&& options);
  template <typename DependentSrc = Src,
            std::enable_if_t<FdSupportsOpenAt<DependentSrc>::value, int> = 0>
  void OpenAtImpl(UnownedFd dir_fd, PathRef filename, Options&& options);

  template <typename DependentSrc = Src,
            std::enable_if_t<std::is_same_v<DependentSrc, UnownedFd>, int> = 0>
  void InitializeWithExistingData(UnownedFd src, const Chain& data);

  // The object providing and possibly owning the fd being read from.
  Dependency<FdHandle, Src> src_;
};

explicit FdMMapReader(Closed) -> FdMMapReader<DeleteCtad<Closed>>;
template <typename Src>
explicit FdMMapReader(
    Src&& src, FdMMapReaderBase::Options options = FdMMapReaderBase::Options())
    -> FdMMapReader<std::conditional_t<
        std::disjunction_v<std::is_convertible<Src&&, int>,
                           std::is_convertible<Src&&, absl::string_view>>,
        OwnedFd, TargetT<Src>>>;
explicit FdMMapReader(UnownedFd dir_fd, PathRef filename,
                      FdMMapReaderBase::Options options =
                          FdMMapReaderBase::Options()) -> FdMMapReader<OwnedFd>;

// Implementation details follow.

inline FdMMapReaderBase::FdMMapReaderBase()
    // The `Chain` to read from is not known yet. `ChainReader` will be reset in
    // `Initialize()` to read from the `Chain` when it is known.
    : ChainReader(kClosed) {}

inline FdMMapReaderBase::FdMMapReaderBase(FdMMapReaderBase&& that) noexcept
    : ChainReader(static_cast<ChainReader&&>(that)),
      base_pos_to_sync_(that.base_pos_to_sync_) {}

inline FdMMapReaderBase& FdMMapReaderBase::operator=(
    FdMMapReaderBase&& that) noexcept {
  ChainReader::operator=(static_cast<ChainReader&&>(that));
  base_pos_to_sync_ = that.base_pos_to_sync_;
  return *this;
}

inline void FdMMapReaderBase::Reset(Closed) {
  ChainReader::Reset(kClosed);
  base_pos_to_sync_ = std::nullopt;
}

inline void FdMMapReaderBase::Reset() {
  // The `Chain` to read from is not known yet. `ChainReader` will be reset in
  // `Initialize()` to read from the `Chain` when it is known.
  ChainReader::Reset(kClosed);
  base_pos_to_sync_ = std::nullopt;
}

template <typename Src>
inline FdMMapReader<Src>::FdMMapReader(Initializer<Src> src, Options options)
    : src_(std::move(src)) {
  Initialize(src_.get().get(), std::move(options));
}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<std::is_constructible_v<DependentSrc, int>, int>>
inline FdMMapReader<Src>::FdMMapReader(int src ABSL_ATTRIBUTE_LIFETIME_BOUND,
                                       Options options)
    : FdMMapReader(riegeli::Maker(src), std::move(options)) {}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<
              std::conjunction_v<FdSupportsOpen<DependentSrc>,
                                 std::is_default_constructible<DependentSrc>>,
              int>>
inline FdMMapReader<Src>::FdMMapReader(PathInitializer filename,
                                       Options options)
    : src_(riegeli::Maker()) {
  OpenImpl(std::move(filename), std::move(options));
}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<
              std::conjunction_v<FdSupportsOpenAt<DependentSrc>,
                                 std::is_default_constructible<DependentSrc>>,
              int>>
inline FdMMapReader<Src>::FdMMapReader(UnownedFd dir_fd, PathRef filename,
                                       Options options)
    : src_(riegeli::Maker()) {
  OpenAtImpl(std::move(dir_fd), filename, std::move(options));
}

template <typename Src>
inline void FdMMapReader<Src>::Reset(Closed) {
  FdMMapReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void FdMMapReader<Src>::Reset(Initializer<Src> src, Options options) {
  FdMMapReaderBase::Reset();
  src_.Reset(std::move(src));
  Initialize(src_.get().get(), std::move(options));
}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<std::is_constructible_v<DependentSrc, int>, int>>
inline void FdMMapReader<Src>::Reset(int src, Options options) {
  Reset(riegeli::Maker(src), std::move(options));
}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<std::conjunction_v<FdSupportsOpen<DependentSrc>,
                                              SupportsReset<DependentSrc>>,
                           int>>
inline void FdMMapReader<Src>::Reset(PathInitializer filename,
                                     Options options) {
  // In case `filename` is owned by `src_` and gets invalidated.
  std::string filename_copy = std::move(filename);
  riegeli::Reset(src_.manager());
  FdMMapReaderBase::Reset();
  OpenImpl(std::move(filename_copy), std::move(options));
}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<std::conjunction_v<FdSupportsOpenAt<DependentSrc>,
                                              SupportsReset<DependentSrc>>,
                           int>>
inline void FdMMapReader<Src>::Reset(UnownedFd dir_fd, PathInitializer filename,
                                     Options options) {
  // In case `filename` is owned by `src_` and gets invalidated.
  std::string filename_copy = std::move(filename);
  riegeli::Reset(src_.manager());
  FdMMapReaderBase::Reset();
  OpenAtImpl(dir_fd, filename_copy, std::move(options));
}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<FdSupportsOpen<DependentSrc>::value, int>>
void FdMMapReader<Src>::OpenImpl(PathInitializer filename, Options&& options) {
  absl::Status status = src_.manager().Open(std::move(filename), options.mode(),
                                            OwnedFd::kDefaultPermissions);
  if (ABSL_PREDICT_FALSE(!status.ok())) {
    FdMMapReaderBase::Reset(kClosed);
    FailWithoutAnnotation(std::move(status));
    return;
  }
  InitializePos(src_.get().get(), std::move(options));
}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<FdSupportsOpenAt<DependentSrc>::value, int>>
void FdMMapReader<Src>::OpenAtImpl(UnownedFd dir_fd, PathRef filename,
                                   Options&& options) {
  absl::Status status =
      src_.manager().OpenAt(std::move(dir_fd), filename, options.mode(),
                            OwnedFd::kDefaultPermissions);
  if (ABSL_PREDICT_FALSE(!status.ok())) {
    FdMMapReaderBase::Reset(kClosed);
    FailWithoutAnnotation(std::move(status));
    return;
  }
  InitializePos(src_.get().get(), std::move(options));
}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<std::is_same_v<DependentSrc, UnownedFd>, int>>
void FdMMapReader<Src>::InitializeWithExistingData(UnownedFd src,
                                                   const Chain& data) {
  FdMMapReaderBase::Reset();
  src_.Reset(std::move(src));
  ChainReader::Reset(data);
}

template <typename Src>
void FdMMapReader<Src>::Done() {
  FdMMapReaderBase::Done();
  if (src_.IsOwning()) {
    if (absl::Status status = src_.get().Close();
        ABSL_PREDICT_FALSE(!status.ok())) {
      Fail(std::move(status));
    }
  }
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_FD_MMAP_READER_H_


================================================
FILE: riegeli/bytes/fd_reader.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef _WIN32

// Make `pread()` and `posix_fadvise()` available.
#if !defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < 600
#undef _XOPEN_SOURCE
#define _XOPEN_SOURCE 600
#endif

// Make `off_t` 64-bit even on 32-bit systems.
#undef _FILE_OFFSET_BITS
#define _FILE_OFFSET_BITS 64

// Make `copy_file_range()` available on Linux.
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif

#else

#define WIN32_LEAN_AND_MEAN

#endif

#include "riegeli/bytes/fd_reader.h"

#include <fcntl.h>
#ifdef _WIN32
#include <io.h>
#endif
#include <stddef.h>
#include <stdio.h>
#include <sys/types.h>
#ifndef _WIN32
#include <unistd.h>
#else
#include <windows.h>
#endif

#include <cerrno>
#include <limits>
#include <memory>
#include <optional>
#ifndef _WIN32
#include <type_traits>
#endif
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/numeric/bits.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#ifdef _WIN32
#include "riegeli/base/errno_mapping.h"
#endif
#include "riegeli/base/global.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_reader.h"
#include "riegeli/bytes/fd_handle.h"
#include "riegeli/bytes/fd_internal_for_cc.h"
#ifndef _WIN32
#include "riegeli/bytes/fd_writer.h"
#endif
#include "riegeli/bytes/reader.h"
#ifndef _WIN32
#include "riegeli/bytes/writer.h"
#endif

namespace riegeli {

#ifndef _WIN32

namespace {

// If `copy_file_range()` is available, it is used to make copying from
// `FdReader` to `FdWriter` more efficient.
//
// By default its availability is autodetected.
// Define `RIEGELI_DISABLE_COPY_FILE_RANGE` to disable using it even if it
// appears to be available.

#if (defined(__EMSCRIPTEN__) ||                         \
     (defined(__ANDROID__) && __ANDROID_API__ < 34)) && \
    !defined(RIEGELI_DISABLE_COPY_FILE_RANGE)
#define RIEGELI_DISABLE_COPY_FILE_RANGE 1
#endif

#if !RIEGELI_DISABLE_COPY_FILE_RANGE

// `copy_file_range()` is supported by Linux and FreeBSD.

template <typename DependentInt, typename Enable = void>
struct HaveCopyFileRange : std::false_type {};

template <typename DependentInt>
struct HaveCopyFileRange<
    DependentInt,
    std::void_t<decltype(copy_file_range(
        std::declval<DependentInt>(), std::declval<fd_internal::Offset*>(),
        std::declval<int>(), std::declval<fd_internal::Offset*>(),
        std::declval<size_t>(), std::declval<unsigned>()))>> : std::true_type {
};

template <typename DependentInt>
inline ssize_t CopyFileRange(DependentInt src, fd_internal::Offset* src_offset,
                             int dest, fd_internal::Offset* dest_offset,
                             size_t length, unsigned flags) {
  if constexpr (HaveCopyFileRange<DependentInt>::value) {
    return copy_file_range(src, src_offset, dest, dest_offset, length, flags);
  } else {
    errno = EOPNOTSUPP;
    return -1;
  }
}

#endif  // !RIEGELI_DISABLE_COPY_FILE_RANGE

// `posix_fadvise()` is supported by POSIX systems but not MacOS.

template <typename DependentInt, typename Enable = void>
struct HavePosixFadvise : std::false_type {};

template <typename DependentInt>
struct HavePosixFadvise<
    DependentInt,
    std::void_t<decltype(posix_fadvise(
        std::declval<DependentInt>(), std::declval<fd_internal::Offset>(),
        std::declval<fd_internal::Offset>(), std::declval<int>()))>>
    : std::true_type {};

template <typename DependentInt>
inline void FdSetReadAllHint(ABSL_ATTRIBUTE_UNUSED DependentInt src,
                             ABSL_ATTRIBUTE_UNUSED bool read_all_hint) {
  if constexpr (HavePosixFadvise<DependentInt>::value) {
#ifdef POSIX_FADV_SEQUENTIAL
    posix_fadvise(src, 0, 0,
                  read_all_hint ? POSIX_FADV_SEQUENTIAL : POSIX_FADV_NORMAL);
#endif
  }
}

}  // namespace

#endif

void FdReaderBase::Initialize(int src, Options&& options) {
  RIEGELI_ASSERT_GE(src, 0)
      << "Failed precondition of FdReader: negative file descriptor";
  InitializePos(src, std::move(options)
#ifdef _WIN32
                         ,
                /*mode_was_passed_to_open=*/false
#endif
  );
}

void FdReaderBase::InitializePos(int src, Options&& options
#ifdef _WIN32
                                 ,
                                 bool mode_was_passed_to_open
#endif
) {
  RIEGELI_ASSERT(!has_independent_pos_)
      << "Failed precondition of FdReaderBase::InitializePos(): "
         "has_independent_pos_ not reset";
  RIEGELI_ASSERT(!supports_random_access_)
      << "Failed precondition of FdReaderBase::InitializePos(): "
         "supports_random_access_ not reset";
  RIEGELI_ASSERT_OK(random_access_status_)
      << "Failed precondition of FdReaderBase::InitializePos(): "
         "random_access_status_ not reset";
#ifdef _WIN32
  RIEGELI_ASSERT_EQ(original_mode_, std::nullopt)
      << "Failed precondition of FdWriterBase::InitializePos(): "
         "original_mode_ not reset";
  int text_mode = options.mode() &
                  (_O_BINARY | _O_TEXT | _O_WTEXT | _O_U16TEXT | _O_U8TEXT);
  if (!mode_was_passed_to_open && text_mode != 0) {
    const int original_mode = _setmode(src, text_mode);
    if (ABSL_PREDICT_FALSE(original_mode < 0)) {
      FailOperation("_setmode()");
      return;
    }
    original_mode_ = original_mode;
  }
  if (options.assumed_pos() == std::nullopt) {
    if (text_mode == 0) {
      // There is no `_getmode()`, but `_setmode()` returns the previous mode.
      text_mode = _setmode(src, _O_BINARY);
      if (ABSL_PREDICT_FALSE(text_mode < 0)) {
        FailOperation("_setmode()");
        return;
      }
      if (ABSL_PREDICT_FALSE(_setmode(src, text_mode) < 0)) {
        FailOperation("_setmode()");
        return;
      }
    }
    if (text_mode != _O_BINARY) {
      if (ABSL_PREDICT_FALSE(options.independent_pos() != std::nullopt)) {
        Fail(absl::InvalidArgumentError(
            "FdReaderBase::Options::independent_pos() requires binary mode"));
        return;
      }
      options.set_assumed_pos(0);
    }
  }
#endif  // _WIN32
  if (options.assumed_pos() != std::nullopt) {
    if (ABSL_PREDICT_FALSE(options.independent_pos() != std::nullopt)) {
      Fail(absl::InvalidArgumentError(
          "FdReaderBase::Options::assumed_pos() and independent_pos() "
          "must not be both set"));
      return;
    }
    if (ABSL_PREDICT_FALSE(
            *options.assumed_pos() >
            Position{std::numeric_limits<fd_internal::Offset>::max()})) {
      FailOverflow();
      return;
    }
    set_limit_pos(*options.assumed_pos());
    // `supports_random_access_` is left as `false`.
    random_access_status_ = Global([] {
      return absl::UnimplementedError(
          "FdReaderBase::Options::assumed_pos() excludes random access");
    });
  } else if (options.independent_pos() != std::nullopt) {
    has_independent_pos_ = true;
    if (ABSL_PREDICT_FALSE(
            *options.independent_pos() >
            Position{std::numeric_limits<fd_internal::Offset>::max()})) {
      FailOverflow();
      return;
    }
    set_limit_pos(*options.independent_pos());
    supports_random_access_ = true;
  } else {
    const fd_internal::Offset file_pos = fd_internal::LSeek(src, 0, SEEK_CUR);
    if (file_pos < 0) {
      // Random access is not supported. Assume 0 as the initial position.
      // `supports_random_access_` is left as `false`.
      random_access_status_ =
          FailedOperationStatus(fd_internal::kLSeekFunctionName);
      return;
    }
    set_limit_pos(IntCast<Position>(file_pos));

    // Check the size, and whether random access is supported.
    fd_internal::Offset file_size = fd_internal::LSeek(src, 0, SEEK_END);
    if (file_size < 0) {
      // Random access is not supported. `supports_random_access_` is left as
      // `false`.
      //
      // This covers some "/proc" files for which `fd_internal::LSeek(SEEK_CUR)`
      // succeeds but `fd_internal::LSeek(SEEK_END)` fails.
      random_access_status_ =
          FailedOperationStatus(fd_internal::kLSeekFunctionName);
      return;
    }
    if (limit_pos() != IntCast<Position>(file_size)) {
      if (ABSL_PREDICT_FALSE(
              fd_internal::LSeek(src, IntCast<fd_internal::Offset>(limit_pos()),
                                 SEEK_SET) < 0)) {
        FailOperation(fd_internal::kLSeekFunctionName);
        return;
      }
    }
#ifndef _WIN32
    if (file_size == 0 && limit_pos() == 0) {
      // Some "/sys" files claim to have zero size but have non-empty contents
      // when read. Some "/proc" files too, but they have been recognized with
      // a failing `fd_internal::LSeek(SEEK_END)`.
      if (BufferedReader::PullSlow(1, 0)) {
        if (growing_source_) {
          // Check the size again. Maybe the file has grown.
          file_size = fd_internal::LSeek(src, 0, SEEK_END);
          if (ABSL_PREDICT_FALSE(file_size < 0)) {
            FailOperation(fd_internal::kLSeekFunctionName);
            return;
          }
          if (limit_pos() != IntCast<Position>(file_size)) {
            if (ABSL_PREDICT_FALSE(
                    fd_internal::LSeek(
                        src, IntCast<fd_internal::Offset>(limit_pos()),
                        SEEK_SET) < 0)) {
              FailOperation(fd_internal::kLSeekFunctionName);
              return;
            }
          }
          if (file_size > 0) goto regular;
        }
        // This is one of "/sys" files which claim to have zero size but have
        // non-empty contents when read. Random access is not supported.
        // `supports_random_access_` is left as `false`.
        random_access_status_ = Global([] {
          return absl::UnimplementedError(
              "Random access is not supported because "
              "the file claims zero size but has non-empty contents when read");
        });
        return;
      }
      if (ABSL_PREDICT_FALSE(!ok())) return;
      // This is a regular empty file.
    }
  regular:
#endif
    // Random access is supported.
    supports_random_access_ = true;
    if (!growing_source_) set_exact_size(IntCast<Position>(file_size));
  }
  BeginRun();
}

void FdReaderBase::Done() {
  BufferedReader::Done();
#ifdef _WIN32
  if (original_mode_ != std::nullopt) {
    const int src = SrcFd();
    if (ABSL_PREDICT_FALSE(_setmode(src, *original_mode_) < 0)) {
      FailOperation("_setmode()");
    }
  }
#endif  // _WIN32
  random_access_status_ = absl::OkStatus();
}

inline absl::Status FdReaderBase::FailedOperationStatus(
    absl::string_view operation) {
  const int error_number = errno;
  RIEGELI_ASSERT_NE(error_number, 0)
      << "Failed precondition of FdReaderBase::FailedOperationStatus(): "
         "zero errno";
  return absl::ErrnoToStatus(error_number, absl::StrCat(operation, " failed"));
}

bool FdReaderBase::FailOperation(absl::string_view operation) {
  return Fail(FailedOperationStatus(operation));
}

#ifdef _WIN32

bool FdReaderBase::FailWindowsOperation(absl::string_view operation) {
  const DWORD error_number = GetLastError();
  RIEGELI_ASSERT_NE(error_number, 0)
      << "Failed precondition of FdReaderBase::FailWindowsOperation(): "
         "zero error code";
  return Fail(WindowsErrorToStatus(IntCast<uint32_t>(error_number),
                                   absl::StrCat(operation, " failed")));
}

#endif  // _WIN32

absl::Status FdReaderBase::AnnotateStatusImpl(absl::Status status) {
  return BufferedReader::AnnotateStatusImpl(
      Annotate(status, absl::StrCat("reading ", filename())));
}

#ifndef _WIN32

void FdReaderBase::SetReadAllHintImpl(bool read_all_hint) {
  BufferedReader::SetReadAllHintImpl(read_all_hint);
  if (ABSL_PREDICT_FALSE(!ok())) return;
  const int src = SrcFd();
  FdSetReadAllHint(src, read_all_hint);
}

#endif  // !_WIN32

bool FdReaderBase::ReadInternal(size_t min_length, size_t max_length,
                                char* dest) {
  RIEGELI_ASSERT_GT(min_length, 0u)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "nothing to read";
  RIEGELI_ASSERT_GE(max_length, min_length)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "max_length < min_length";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedReader::ReadInternal()";
  const int src = SrcFd();
  for (;;) {
    if (ABSL_PREDICT_FALSE(
            limit_pos() >=
            Position{std::numeric_limits<fd_internal::Offset>::max()})) {
      return FailOverflow();
    }
#ifndef _WIN32
    const size_t length_to_read = UnsignedMin(
        max_length,
        Position{std::numeric_limits<fd_internal::Offset>::max()} - limit_pos(),
        absl::bit_floor(size_t{std::numeric_limits<ssize_t>::max()}),
        // Darwin and FreeBSD cannot read more than 2 GB - 1 at a time.
        // Limit to 1 GB for better alignment of reads.
        // https://codereview.appspot.com/89900044#msg9
        size_t{1} << 30);
  again:
    const ssize_t length_read =
        has_independent_pos_ ? pread(src, dest, length_to_read,
                                     IntCast<fd_internal::Offset>(limit_pos()))
                             : read(src, dest, length_to_read);
    if (ABSL_PREDICT_FALSE(length_read < 0)) {
      if (errno == EINTR) goto again;
      return FailOperation(has_independent_pos_ ? "pread()" : "read()");
    }
#else   // _WIN32
    DWORD length_to_read;
    DWORD length_read;
    if (has_independent_pos_) {
      const HANDLE file_handle = reinterpret_cast<HANDLE>(_get_osfhandle(src));
      if (ABSL_PREDICT_FALSE(file_handle == INVALID_HANDLE_VALUE ||
                             file_handle == reinterpret_cast<HANDLE>(-2))) {
        return FailWindowsOperation("_get_osfhandle()");
      }
      length_to_read = UnsignedMin(
          max_length,
          Position{std::numeric_limits<fd_internal::Offset>::max()} -
              limit_pos(),
          absl::bit_floor(std::numeric_limits<DWORD>::max()));
      OVERLAPPED overlapped{};
      overlapped.Offset = IntCast<DWORD>(limit_pos() & 0xffffffff);
      overlapped.OffsetHigh = IntCast<DWORD>(limit_pos() >> 32);
      if (ABSL_PREDICT_FALSE(!ReadFile(file_handle, dest, length_to_read,
                                       &length_read, &overlapped)) &&
          ABSL_PREDICT_FALSE(GetLastError() != ERROR_HANDLE_EOF)) {
        return FailWindowsOperation("ReadFile()");
      }
    } else {
      length_to_read = UnsignedMin(
          max_length,
          Position{std::numeric_limits<fd_internal::Offset>::max()} -
              limit_pos(),
          absl::bit_floor(unsigned{std::numeric_limits<int>::max()}));
      const int length_read_int =
          _read(src, dest, IntCast<unsigned>(length_to_read));
      if (ABSL_PREDICT_FALSE(length_read_int < 0)) {
        return FailOperation("_read()");
      }
      length_read = IntCast<DWORD>(length_read_int);
    }
#endif  // _WIN32
    if (ABSL_PREDICT_FALSE(length_read == 0)) {
      if (!growing_source_) set_exact_size(limit_pos());
      return false;
    }
    RIEGELI_ASSERT_LE(UnsignedCast(length_read), length_to_read)
#ifndef _WIN32
        << (has_independent_pos_ ? "pread()" : "read()")
#else
        << (has_independent_pos_ ? "ReadFile()" : "_read()")
#endif
        << " read more than requested";
    move_limit_pos(IntCast<size_t>(length_read));
    if (IntCast<size_t>(length_read) >= min_length) return true;
    dest += length_read;
    min_length -= IntCast<size_t>(length_read);
    max_length -= IntCast<size_t>(length_read);
  }
}

#ifndef _WIN32

bool FdReaderBase::CopyInternal(Position length, Writer& dest) {
  RIEGELI_ASSERT_GT(length, 0u)
      << "Failed precondition of BufferedReader::CopyInternal(): "
         "nothing to copy";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedReader::CopyInternal()";
#if !RIEGELI_DISABLE_COPY_FILE_RANGE
  if (HaveCopyFileRange<int>::value) {
    if (FdWriterBase* const fd_writer = dest.GetIf<FdWriterBase>()) {
      const int src = SrcFd();
      for (;;) {
        if (ABSL_PREDICT_FALSE(!fd_writer->Flush(FlushType::kFromObject))) {
          return false;
        }
        const int dest_fd = fd_writer->DestFd();
        fd_internal::Offset src_offset = limit_pos();
        fd_internal::Offset dest_offset = fd_writer->start_pos();
        if (ABSL_PREDICT_FALSE(
                limit_pos() >=
                Position{std::numeric_limits<fd_internal::Offset>::max()})) {
          return FailOverflow();
        }
        const size_t length_to_copy = UnsignedMin(
            length,
            Position{std::numeric_limits<fd_internal::Offset>::max()} -
                limit_pos(),
            absl::bit_floor(size_t{std::numeric_limits<ssize_t>::max()}));
        if (ABSL_PREDICT_FALSE(
                length_to_copy >
                Position{std::numeric_limits<fd_internal::Offset>::max()} -
                    fd_writer->start_pos())) {
          return fd_writer->FailOverflow();
        }
      again:
        const ssize_t length_copied = CopyFileRange(
            src, has_independent_pos_ ? &src_offset : nullptr, dest_fd,
            fd_writer->has_independent_pos_ ? &dest_offset : nullptr,
            length_to_copy, 0);
        if (ABSL_PREDICT_FALSE(length_copied < 0)) {
          if (errno == EINTR) goto again;
          // File descriptors might not support `copy_file_range()` for a
          // variety of reasons, e.g. append mode, not regular files,
          // unsupported filesystem, or cross filesystem copy. Fall back to
          // `read()` and `write()`.
          break;
        }
        if (ABSL_PREDICT_FALSE(length_copied == 0)) {
          if (!growing_source_) set_exact_size(limit_pos());
          return false;
        }
        RIEGELI_ASSERT_LE(IntCast<size_t>(length_copied), length_to_copy)
            << "copy_file_range() copied more than requested";
        move_limit_pos(IntCast<size_t>(length_copied));
        fd_writer->move_start_pos(IntCast<size_t>(length_copied));
        length -= IntCast<size_t>(length_copied);
        if (length == 0) return true;
      }
    }
  }
#endif  // !RIEGELI_DISABLE_COPY_FILE_RANGE
  return BufferedReader::CopyInternal(length, dest);
}

#endif  // !_WIN32

inline bool FdReaderBase::SeekInternal(int src, Position new_pos) {
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of FdReaderBase::SeekInternal(): "
         "buffer not empty";
  RIEGELI_ASSERT(FdReaderBase::SupportsRandomAccess())
      << "Failed precondition of FdReaderBase::SeekInternal(): "
         "random access not supported";
  if (!has_independent_pos_) {
    if (ABSL_PREDICT_FALSE(
            fd_internal::LSeek(src, IntCast<fd_internal::Offset>(new_pos),
                               SEEK_SET) < 0)) {
      return FailOperation(fd_internal::kLSeekFunctionName);
    }
  }
  set_limit_pos(new_pos);
  return true;
}

bool FdReaderBase::SeekBehindBuffer(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!FdReaderBase::SupportsRandomAccess())) {
    if (ABSL_PREDICT_FALSE(new_pos < start_pos())) {
      if (ok()) Fail(random_access_status_);
      return false;
    }
    return BufferedReader::SeekBehindBuffer(new_pos);
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const int src = SrcFd();
  if (new_pos > limit_pos()) {
    // Seeking forwards.
    Position file_size;
    if (exact_size() != std::nullopt) {
      file_size = *exact_size();
    } else {
      fd_internal::StatInfo stat_info;
      if (ABSL_PREDICT_FALSE(fd_internal::FStat(src, &stat_info) < 0)) {
        return FailOperation(fd_internal::kFStatFunctionName);
      }
      file_size = IntCast<Position>(stat_info.st_size);
      if (!growing_source_) set_exact_size(file_size);
    }
    if (ABSL_PREDICT_FALSE(new_pos > file_size)) {
      // File ends.
      SeekInternal(src, file_size);
      return false;
    }
  }
  return SeekInternal(src, new_pos);
}

std::optional<Position> FdReaderBase::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  if (exact_size() != std::nullopt) return *exact_size();
  if (ABSL_PREDICT_FALSE(!FdReaderBase::SupportsRandomAccess())) {
    Fail(random_access_status_);
    return std::nullopt;
  }
  const int src = SrcFd();
  fd_internal::StatInfo stat_info;
  if (ABSL_PREDICT_FALSE(fd_internal::FStat(src, &stat_info) < 0)) {
    FailOperation(fd_internal::kFStatFunctionName);
    return std::nullopt;
  }
  if (!growing_source_) set_exact_size(IntCast<Position>(stat_info.st_size));
  return IntCast<Position>(stat_info.st_size);
}

std::unique_ptr<Reader> FdReaderBase::NewReaderImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!FdReaderBase::SupportsNewReader())) {
    if (ok()) {
      Fail(
#ifdef _WIN32
          !has_independent_pos_
              ? absl::UnimplementedError(
                    "NewReader() requires "
                    "FdReaderBase::Options::independent_pos()")
              :
#endif
              random_access_status_);
    }
    return nullptr;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point.
  std::unique_ptr<FdReader<UnownedFd>> reader =
      std::make_unique<FdReader<UnownedFd>>(
          UnownedFd(SrcFdHandle()), FdReaderBase::Options()
                                        .set_independent_pos(initial_pos)
                                        .set_growing_source(growing_source_)
                                        .set_buffer_options(buffer_options()));
  reader->set_exact_size(exact_size());
  return reader;
}

std::unique_ptr<Reader> FdReaderBase::NewReaderCurrentPosImpl() {
  std::unique_ptr<Reader> reader = FdReaderBase::NewReaderImpl(pos());
  if (ABSL_PREDICT_TRUE(reader != nullptr)) {
    ShareBufferTo(*static_cast<FdReaderBase*>(reader.get()));
  }
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/fd_reader.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_FD_READER_H_
#define RIEGELI_BYTES_FD_READER_H_

#include <fcntl.h>
#include <stddef.h>

#include <memory>
#include <optional>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_reader.h"
#include "riegeli/bytes/fd_handle.h"
#include "riegeli/bytes/fd_internal.h"
#include "riegeli/bytes/path_ref.h"
#include "riegeli/bytes/reader.h"
#ifndef _WIN32
#include "riegeli/bytes/writer.h"
#endif

namespace riegeli {

// Template parameter independent part of `FdReader`.
class FdReaderBase : public BufferedReader {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // If `FdReader` opens a fd with a filename, `mode()` is the second argument
    // of `open()` (on Windows: `_open()`) and specifies the open mode and
    // flags, typically `O_RDONLY` (on Windows: `_O_RDONLY | _O_BINARY`).
    // It must include either `O_RDONLY` or `O_RDWR` (on Windows: `_O_RDONLY` or
    // `_O_RDWR`).
    //
    // `mode()` can also be changed with `set_inheritable()` and `set_text()`.
    //
    // Default: `O_RDONLY | O_CLOEXEC`
    // (on Windows: `_O_RDONLY | _O_BINARY | _O_NOINHERIT`).
    Options& set_mode(int mode) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      mode_ = mode;
      return *this;
    }
    Options&& set_mode(int mode) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_mode(mode));
    }
    int mode() const { return mode_; }

    // If `false`, `execve()` (`CreateProcess()` on Windows) will close the fd.
    //
    // If `true`, the fd will remain open across `execve()` (`CreateProcess()`
    // on Windows).
    //
    // If `FdReader` reads from an already open fd, `inheritable()` has no
    // effect.
    //
    // `set_inheritable()` affects `mode()`.
    //
    // Default: `false`.
    Options& set_inheritable(bool inheritable) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      mode_ = (mode_ & ~fd_internal::kCloseOnExec) |
              (inheritable ? 0 : fd_internal::kCloseOnExec);
      return *this;
    }
    Options&& set_inheritable(bool inheritable) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_inheritable(inheritable));
    }
    bool inheritable() const {
      return (mode_ & fd_internal::kCloseOnExec) == 0;
    }

    // If `false`, data will be read directly from the file. This is called the
    // binary mode.
    //
    // If `true`, text mode translation will be applied on Windows:
    // CR-LF character pairs are translated to LF, and a ^Z character is
    // interpreted as end of file.
    //
    // It is recommended to use `ReadLine()` or `TextReader` instead, which
    // expect a binary mode `Reader`.
    //
    // `set_text()` has an effect only on Windows. It is applicable whenever
    // `FdReader` opens a fd with a filename or reads from an already open fd.
    //
    // `set_text()` affects `mode()`.
    //
    // Default: `false`.
    Options& set_text(ABSL_ATTRIBUTE_UNUSED bool text) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
#ifdef _WIN32
      mode_ =
          (mode_ & ~(_O_BINARY | _O_TEXT | _O_WTEXT | _O_U16TEXT | _O_U8TEXT)) |
          (text ? _O_TEXT : _O_BINARY);
#endif
      return *this;
    }
    Options&& set_text(bool text) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_text(text));
    }
    // No `text()` getter is provided. On Windows `mode()` can have unspecified
    // text mode, resolved using `_get_fmode()`. Not on Windows the concept does
    // not exist.

    // If `std::nullopt`, the current position reported by `pos()` corresponds
    // to the current fd position if possible, otherwise 0 is assumed as the
    // initial position. Random access is supported if the fd supports random
    // access. On Windows binary mode is also required.
    //
    // If not `std::nullopt`, this position is assumed initially, to be reported
    // by `pos()`. It does not need to correspond to the current fd position.
    // Random access is not supported.
    //
    // `assumed_pos()` and `independent_pos()` must not be both set.
    //
    // Default: `std::nullopt`.
    Options& set_assumed_pos(std::optional<Position> assumed_pos) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      assumed_pos_ = assumed_pos;
      return *this;
    }
    Options&& set_assumed_pos(std::optional<Position> assumed_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_assumed_pos(assumed_pos));
    }
    std::optional<Position> assumed_pos() const { return assumed_pos_; }

    // If `std::nullopt`, `FdReader` reads at the current fd position.
    //
    // If not `std::nullopt`, `FdReader` reads starting from this position.
    // The current fd position is not disturbed except on Windows, where seeking
    // and reading is nevertheless atomic. This is useful for multiple readers
    // concurrently reading from the same fd. The fd must support `pread()`
    // (`_get_osfhandle()` and `ReadFile()` with `OVERLAPPED*` on Windows).
    // On Windows binary mode is also required.
    //
    // `assumed_pos()` and `independent_pos()` must not be both set.
    //
    // Default: `std::nullopt`.
    Options& set_independent_pos(std::optional<Position> independent_pos) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      independent_pos_ = independent_pos;
      return *this;
    }
    Options&& set_independent_pos(std::optional<Position> independent_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_independent_pos(independent_pos));
    }
    std::optional<Position> independent_pos() const { return independent_pos_; }

    // If `true`, supports reading up to the end of the file, then retrying when
    // the file has grown. This disables caching the file size.
    //
    // Default: `false`.
    Options& set_growing_source(bool growing_source) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      growing_source_ = growing_source;
      return *this;
    }
    Options&& set_growing_source(bool growing_source) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_growing_source(growing_source));
    }
    bool growing_source() const { return growing_source_; }

   private:
#ifndef _WIN32
    int mode_ = O_RDONLY | fd_internal::kCloseOnExec;
#else
    int mode_ = _O_RDONLY | _O_BINARY | fd_internal::kCloseOnExec;
#endif
    std::optional<Position> assumed_pos_;
    std::optional<Position> independent_pos_;
    bool growing_source_ = false;
  };

  // Returns the `FdHandle` being read from. Unchanged by `Close()`.
  virtual FdHandle SrcFdHandle() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns the fd being read from. If the fd is owned then changed to -1 by
  // `Close()`, otherwise unchanged.
  virtual int SrcFd() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns the filename of the fd being read from, or "<none>" for
  // closed-constructed or moved-from `FdReader`. Unchanged by `Close()`.
  //
  // If the constructor from filename was used, this is the filename passed to
  // the constructor, otherwise a filename is inferred from the fd. This can be
  // a placeholder instead of a real filename if the fd does not refer to a
  // named file or inferring the filename is not supported.
  //
  // If `Src` does not support `filename()`, returns "<unsupported>".
  absl::string_view filename() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return SrcFdHandle().filename();
  }

  bool ToleratesReadingAhead() override {
    return BufferedReader::ToleratesReadingAhead() ||
           FdReaderBase::SupportsRandomAccess();
  }
  bool SupportsRandomAccess() override { return supports_random_access_; }
  bool SupportsNewReader() override {
#ifndef _WIN32
    return FdReaderBase::SupportsRandomAccess();
#else
    return has_independent_pos_ && FdReaderBase::SupportsRandomAccess();
#endif
  }

 protected:
  explicit FdReaderBase(Closed) noexcept : BufferedReader(kClosed) {}

  explicit FdReaderBase(BufferOptions buffer_options, bool growing_source);

  FdReaderBase(FdReaderBase&& that) noexcept;
  FdReaderBase& operator=(FdReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options, bool growing_source);
  void Initialize(int src, Options&& options);
  void InitializePos(int src, Options&& options
#ifdef _WIN32
                     ,
                     bool mode_was_passed_to_open
#endif
  );
  ABSL_ATTRIBUTE_COLD bool FailOperation(absl::string_view operation);
#ifdef _WIN32
  ABSL_ATTRIBUTE_COLD bool FailWindowsOperation(absl::string_view operation);
#endif

  void Done() override;
  absl::Status AnnotateStatusImpl(absl::Status status) override;
#ifndef _WIN32
  void SetReadAllHintImpl(bool read_all_hint) override;
#endif
  bool ReadInternal(size_t min_length, size_t max_length, char* dest) override;
#ifndef _WIN32
  bool CopyInternal(Position length, Writer& dest) override;
#endif
  bool SeekBehindBuffer(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;
  std::unique_ptr<Reader> NewReaderCurrentPosImpl() override;

 private:
  absl::Status FailedOperationStatus(absl::string_view operation);

  bool SeekInternal(int src, Position new_pos);

  bool has_independent_pos_ = false;
  bool growing_source_ = false;
  bool supports_random_access_ = false;
  absl::Status random_access_status_;
#ifdef _WIN32
  std::optional<int> original_mode_;
#endif

  // Invariant: `limit_pos() <= std::numeric_limits<fd_internal::Offset>::max()`
};

// A `Reader` which reads from a file descriptor.
//
// The fd must support:
#ifndef _WIN32
//  * `close()` - if the fd is owned
//  * `read()`  - if `Options::independent_pos() == std::nullopt`
//  * `pread()` - if `Options::independent_pos() != std::nullopt`,
//                or for `NewReader()`
//  * `lseek()` - for `Seek()` or `Size()`
//                if `Options::independent_pos() == std::nullopt`
//  * `fstat()` - for `Seek()` or `Size()`
#else
//  * `_close()`    - if the fd is owned
//  * `_read()`     - if `Options::independent_pos() == std::nullopt`
//  * `_get_osfhandle()`, `ReadFile()` with `OVERLAPPED*`
//                  - if `Options::independent_pos() != std::nullopt`
//  * `_lseeki64()` - for `Seek()` or `Size()`
//                    if `Options::independent_pos() == std::nullopt`
//  * `_fstat64()`  - for `Seek()` or `Size()`
#endif
//
// `FdReader` supports random access if
// `Options::assumed_pos() == std::nullopt` and the fd supports random access
// (this is assumed if `Options::independent_pos() != std::nullopt`, otherwise
// this is checked by calling `lseek(SEEK_END)`, or `_lseeki64()` on Windows).
// On Windows binary mode is also required.
//
// `FdReader` supports `NewReader()` if it supports random access. On Windows
// `independent_pos() != std::nullopt` is also required.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the fd being read from. `Src` must support
// `Dependency<FdHandle, Src>`, e.g. `OwnedFd` (owned, default),
// `UnownedFd` (not owned), `AnyFd` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `OwnedFd` if the
// first constructor argument is a filename or an `int`, otherwise as `TargetT`
// of the type of the first constructor argument.
//
// Warning: if random access is not supported and the fd is not owned, it will
// have an unpredictable amount of extra data consumed because of buffering.
//
// Until the `FdReader` is closed or no longer used, the fd must not be closed.
// Additionally, if `Options::independent_pos() == std::nullopt`
// (or unconditionally on Windows), the fd must not have its position changed.
template <typename Src = OwnedFd>
class FdReader : public FdReaderBase {
 public:
  // Creates a closed `FdReader`.
  explicit FdReader(Closed) noexcept : FdReaderBase(kClosed) {}

  // Will read from the fd provided by `src`.
  explicit FdReader(Initializer<Src> src, Options options = Options());

  // Will read from `src`.
  template <
      typename DependentSrc = Src,
      std::enable_if_t<std::is_constructible_v<DependentSrc, int>, int> = 0>
  explicit FdReader(int src ABSL_ATTRIBUTE_LIFETIME_BOUND,
                    Options options = Options());

  // Opens a file for reading.
  //
  // If opening the file fails, `FdReader` will be failed and closed.
  //
  // This constructor is present only if `Src` supports `Open()`.
  template <typename DependentSrc = Src,
            std::enable_if_t<
                std::conjunction_v<FdSupportsOpen<DependentSrc>,
                                   std::is_default_constructible<DependentSrc>>,
                int> = 0>
  explicit FdReader(PathInitializer filename, Options options = Options());

  // Opens a file for reading, with the filename interpreted relatively to the
  // directory specified by an existing fd.
  //
  // If opening the file fails, `FdReader` will be failed and closed.
  //
  // This constructor is present only if `Src` supports `OpenAt()`.
  template <typename DependentSrc = Src,
            std::enable_if_t<
                std::conjunction_v<FdSupportsOpenAt<DependentSrc>,
                                   std::is_default_constructible<DependentSrc>>,
                int> = 0>
  explicit FdReader(UnownedFd dir_fd, PathRef filename,
                    Options options = Options());

  FdReader(FdReader&& that) = default;
  FdReader& operator=(FdReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `FdReader`. This avoids
  // constructing a temporary `FdReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());
  template <
      typename DependentSrc = Src,
      std::enable_if_t<std::is_constructible_v<DependentSrc, int>, int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(int src, Options options = Options());
  template <typename DependentSrc = Src,
            std::enable_if_t<std::conjunction_v<FdSupportsOpen<DependentSrc>,
                                                SupportsReset<DependentSrc>>,
                             int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(PathInitializer filename,
                                          Options options = Options());
  template <typename DependentSrc = Src,
            std::enable_if_t<std::conjunction_v<FdSupportsOpenAt<DependentSrc>,
                                                SupportsReset<DependentSrc>>,
                             int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(UnownedFd dir_fd,
                                          PathInitializer filename,
                                          Options options = Options());

  // Returns the object providing and possibly owning the fd being read from.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  FdHandle SrcFdHandle() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }
  int SrcFd() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get().get();
  }

  // An optimized implementation in a derived class, avoiding a virtual call.
  absl::string_view filename() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.get().filename();
  }

 protected:
  void Done() override;

 private:
  template <typename DependentSrc = Src,
            std::enable_if_t<FdSupportsOpen<DependentSrc>::value, int> = 0>
  void OpenImpl(PathInitializer filename, Options&& options);
  template <typename DependentSrc = Src,
            std::enable_if_t<FdSupportsOpenAt<DependentSrc>::value, int> = 0>
  void OpenAtImpl(UnownedFd dir_fd, PathRef filename, Options&& options);

  // The object providing and possibly owning the fd being read from.
  Dependency<FdHandle, Src> src_;
};

explicit FdReader(Closed) -> FdReader<DeleteCtad<Closed>>;
template <typename Src>
explicit FdReader(Src&& src,
                  FdReaderBase::Options options = FdReaderBase::Options())
    -> FdReader<std::conditional_t<
        std::disjunction_v<std::is_convertible<Src&&, int>,
                           std::is_convertible<Src&&, PathInitializer>>,
        OwnedFd, TargetT<Src>>>;
explicit FdReader(UnownedFd dir_fd, PathRef filename,
                  FdReaderBase::Options options = FdReaderBase::Options())
    -> FdReader<OwnedFd>;

// Implementation details follow.

inline FdReaderBase::FdReaderBase(BufferOptions buffer_options,
                                  bool growing_source)
    : BufferedReader(buffer_options), growing_source_(growing_source) {}

inline FdReaderBase::FdReaderBase(FdReaderBase&& that) noexcept
    : BufferedReader(static_cast<BufferedReader&&>(that)),
      has_independent_pos_(that.has_independent_pos_),
      growing_source_(that.growing_source_),
      supports_random_access_(
          std::exchange(that.supports_random_access_, false)),
      random_access_status_(std::move(that.random_access_status_))
#ifdef _WIN32
      ,
      original_mode_(that.original_mode_)
#endif
{
}

inline FdReaderBase& FdReaderBase::operator=(FdReaderBase&& that) noexcept {
  BufferedReader::operator=(static_cast<BufferedReader&&>(that));
  has_independent_pos_ = that.has_independent_pos_;
  growing_source_ = that.growing_source_;
  supports_random_access_ = std::exchange(that.supports_random_access_, false);
  random_access_status_ = std::move(that.random_access_status_);
#ifdef _WIN32
  original_mode_ = that.original_mode_;
#endif
  return *this;
}

inline void FdReaderBase::Reset(Closed) {
  BufferedReader::Reset(kClosed);
  has_independent_pos_ = false;
  growing_source_ = false;
  supports_random_access_ = false;
  random_access_status_ = absl::OkStatus();
#ifdef _WIN32
  original_mode_ = std::nullopt;
#endif
}

inline void FdReaderBase::Reset(BufferOptions buffer_options,
                                bool growing_source) {
  BufferedReader::Reset(buffer_options);
  has_independent_pos_ = false;
  growing_source_ = growing_source;
  supports_random_access_ = false;
  random_access_status_ = absl::OkStatus();
#ifdef _WIN32
  original_mode_ = std::nullopt;
#endif
}

template <typename Src>
inline FdReader<Src>::FdReader(Initializer<Src> src, Options options)
    : FdReaderBase(options.buffer_options(), options.growing_source()),
      src_(std::move(src)) {
  Initialize(src_.get().get(), std::move(options));
}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<std::is_constructible_v<DependentSrc, int>, int>>
inline FdReader<Src>::FdReader(int src ABSL_ATTRIBUTE_LIFETIME_BOUND,
                               Options options)
    : FdReader(riegeli::Maker(src), std::move(options)) {}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<
              std::conjunction_v<FdSupportsOpen<DependentSrc>,
                                 std::is_default_constructible<DependentSrc>>,
              int>>
inline FdReader<Src>::FdReader(PathInitializer filename, Options options)
    : FdReaderBase(options.buffer_options(), options.growing_source()),
      src_(riegeli::Maker()) {
  OpenImpl(std::move(filename), std::move(options));
}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<
              std::conjunction_v<FdSupportsOpenAt<DependentSrc>,
                                 std::is_default_constructible<DependentSrc>>,
              int>>
inline FdReader<Src>::FdReader(UnownedFd dir_fd, PathRef filename,
                               Options options)
    : FdReaderBase(options.buffer_options(), options.growing_source()),
      src_(riegeli::Maker()) {
  OpenAtImpl(std::move(dir_fd), filename, std::move(options));
}

template <typename Src>
inline void FdReader<Src>::Reset(Closed) {
  FdReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void FdReader<Src>::Reset(Initializer<Src> src, Options options) {
  FdReaderBase::Reset(options.buffer_options(), options.growing_source());
  src_.Reset(std::move(src));
  Initialize(src_.get().get(), std::move(options));
}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<std::is_constructible_v<DependentSrc, int>, int>>
inline void FdReader<Src>::Reset(int src, Options options) {
  Reset(riegeli::Maker(src), std::move(options));
}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<std::conjunction_v<FdSupportsOpen<DependentSrc>,
                                              SupportsReset<DependentSrc>>,
                           int>>
inline void FdReader<Src>::Reset(PathInitializer filename, Options options) {
  // In case `filename` is owned by `src_` and gets invalidated.
  std::string filename_copy = std::move(filename);
  riegeli::Reset(src_.manager());
  FdReaderBase::Reset(options.buffer_options(), options.growing_source());
  OpenImpl(std::move(filename_copy), std::move(options));
}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<std::conjunction_v<FdSupportsOpenAt<DependentSrc>,
                                              SupportsReset<DependentSrc>>,
                           int>>
inline void FdReader<Src>::Reset(UnownedFd dir_fd, PathInitializer filename,
                                 Options options) {
  // In case `filename` is owned by `src_` and gets invalidated.
  std::string filename_copy = std::move(filename);
  riegeli::Reset(src_.manager());
  FdReaderBase::Reset(options.buffer_options(), options.growing_source());
  OpenAtImpl(dir_fd, filename_copy, std::move(options));
}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<FdSupportsOpen<DependentSrc>::value, int>>
void FdReader<Src>::OpenImpl(PathInitializer filename, Options&& options) {
  absl::Status status = src_.manager().Open(std::move(filename), options.mode(),
                                            OwnedFd::kDefaultPermissions);
  if (ABSL_PREDICT_FALSE(!status.ok())) {
    FdReaderBase::Reset(kClosed);
    FailWithoutAnnotation(std::move(status));
    return;
  }
  InitializePos(src_.get().get(), std::move(options)
#ifdef _WIN32
                                      ,
                /*mode_was_passed_to_open=*/true
#endif
  );
}

template <typename Src>
template <typename DependentSrc,
          std::enable_if_t<FdSupportsOpenAt<DependentSrc>::value, int>>
void FdReader<Src>::OpenAtImpl(UnownedFd dir_fd, PathRef filename,
                               Options&& options) {
  absl::Status status =
      src_.manager().OpenAt(std::move(dir_fd), filename, options.mode(),
                            OwnedFd::kDefaultPermissions);
  if (ABSL_PREDICT_FALSE(!status.ok())) {
    FdReaderBase::Reset(kClosed);
    FailWithoutAnnotation(std::move(status));
    return;
  }
  InitializePos(src_.get().get(), std::move(options)
#ifdef _WIN32
                                      ,
                /*mode_was_passed_to_open=*/true
#endif
  );
}

template <typename Src>
void FdReader<Src>::Done() {
  FdReaderBase::Done();
  if (src_.IsOwning()) {
    if (absl::Status status = src_.get().Close();
        ABSL_PREDICT_FALSE(!status.ok())) {
      Fail(std::move(status));
    }
  }
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_FD_READER_H_


================================================
FILE: riegeli/bytes/fd_writer.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef _WIN32

// Make `pwrite()` and `ftruncate()` available.
#if !defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < 500
#undef _XOPEN_SOURCE
#define _XOPEN_SOURCE 500
#endif

// Make `off_t` 64-bit even on 32-bit systems.
#undef _FILE_OFFSET_BITS
#define _FILE_OFFSET_BITS 64

#else

#define WIN32_LEAN_AND_MEAN

#endif

#include "riegeli/bytes/fd_writer.h"

#include <fcntl.h>
#ifdef _WIN32
#include <io.h>
#endif
#include <stddef.h>
#include <stdio.h>
#include <sys/types.h>
#ifndef _WIN32
#include <unistd.h>
#else
#include <windows.h>
#endif

#include <cerrno>
#include <limits>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/numeric/bits.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#ifdef _WIN32
#include "riegeli/base/errno_mapping.h"
#endif
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/global.h"
#include "riegeli/base/status.h"
#include "riegeli/base/type_id.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/fd_handle.h"
#include "riegeli/bytes/fd_internal_for_cc.h"
#include "riegeli/bytes/fd_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

TypeId FdWriterBase::GetTypeId() const { return TypeId::For<FdWriterBase>(); }

void FdWriterBase::Initialize(int dest, Options&& options) {
  RIEGELI_ASSERT_GE(dest, 0)
      << "Failed precondition of FdWriter: negative file descriptor";
  InitializePos(dest, std::move(options), /*mode_was_passed_to_open=*/false);
}

void FdWriterBase::InitializePos(int dest, Options&& options,
                                 bool mode_was_passed_to_open) {
  RIEGELI_ASSERT(!has_independent_pos_)
      << "Failed precondition of FdWriterBase::InitializePos(): "
         "has_independent_pos_ not reset";
  RIEGELI_ASSERT_EQ(supports_random_access_, LazyBoolState::kUnknown)
      << "Failed precondition of FdWriterBase::InitializePos(): "
         "supports_random_access_ not reset";
  RIEGELI_ASSERT_EQ(supports_read_mode_, LazyBoolState::kUnknown)
      << "Failed precondition of FdWriterBase::InitializePos(): "
         "supports_read_mode_ not reset";
  RIEGELI_ASSERT_OK(random_access_status_)
      << "Failed precondition of FdWriterBase::InitializePos(): "
         "random_access_status_ not reset";
  RIEGELI_ASSERT_OK(read_mode_status_)
      << "Failed precondition of FdWriterBase::InitializePos(): "
         "read_mode_status_ not reset";
#ifndef _WIN32
  if (!mode_was_passed_to_open) {
    const int mode = fcntl(dest, F_GETFL);
    if (ABSL_PREDICT_FALSE(mode < 0)) {
      FailOperation("fcntl()");
      return;
    }
    options.set_mode(mode);
  }
  if ((options.mode() & O_ACCMODE) != O_RDWR) {
    supports_read_mode_ = LazyBoolState::kFalse;
    read_mode_status_ = Global([] {
      return absl::UnimplementedError("Mode does not include O_RDWR");
    });
  }
#else   // _WIN32
  RIEGELI_ASSERT_EQ(original_mode_, std::nullopt)
      << "Failed precondition of FdWriterBase::InitializePos(): "
         "original_mode_ not reset";
  int text_mode = options.mode() &
                  (_O_BINARY | _O_TEXT | _O_WTEXT | _O_U16TEXT | _O_U8TEXT);
  if (mode_was_passed_to_open) {
    if ((options.mode() & (_O_RDONLY | _O_WRONLY | _O_RDWR)) != _O_RDWR) {
      supports_read_mode_ = LazyBoolState::kFalse;
      read_mode_status_ = Global([] {
        return absl::UnimplementedError("Mode does not include _O_RDWR");
      });
    }
  } else if (text_mode != 0) {
    const int original_mode = _setmode(dest, text_mode);
    if (ABSL_PREDICT_FALSE(original_mode < 0)) {
      FailOperation("_setmode()");
      return;
    }
    original_mode_ = original_mode;
  }
  if (options.assumed_pos() == std::nullopt) {
    if (text_mode == 0) {
      // There is no `_getmode()`, but `_setmode()` returns the previous mode.
      text_mode = _setmode(dest, _O_BINARY);
      if (ABSL_PREDICT_FALSE(text_mode < 0)) {
        FailOperation("_setmode()");
        return;
      }
      if (ABSL_PREDICT_FALSE(_setmode(dest, text_mode) < 0)) {
        FailOperation("_setmode()");
        return;
      }
    }
    if (text_mode != _O_BINARY) {
      if (ABSL_PREDICT_FALSE(options.independent_pos() != std::nullopt)) {
        Fail(absl::InvalidArgumentError(
            "FdWriterBase::Options::independent_pos() requires binary mode"));
        return;
      }
      options.set_assumed_pos(0);
    }
  }
#endif  // _WIN32
  if (options.assumed_pos() != std::nullopt) {
    if (ABSL_PREDICT_FALSE(options.independent_pos() != std::nullopt)) {
      Fail(absl::InvalidArgumentError(
          "FdWriterBase::Options::assumed_pos() and independent_pos() "
          "must not be both set"));
      return;
    }
    if (ABSL_PREDICT_FALSE(
            *options.assumed_pos() >
            Position{std::numeric_limits<fd_internal::Offset>::max()})) {
      FailOverflow();
      return;
    }
    set_start_pos(*options.assumed_pos());
    supports_random_access_ = LazyBoolState::kFalse;
    supports_read_mode_ = LazyBoolState::kFalse;
    random_access_status_ = Global([] {
      return absl::UnimplementedError(
          "FileWriterBase::Options::assumed_pos() excludes random access");
    });
    read_mode_status_.Update(random_access_status_);
  } else if (options.independent_pos() != std::nullopt) {
    if (ABSL_PREDICT_FALSE((options.mode() & O_APPEND) != 0)) {
      Fail(
          absl::InvalidArgumentError("FdWriterBase::Options::independent_pos() "
                                     "is incompatible with append mode"));
      return;
    }
    has_independent_pos_ = true;
    if (ABSL_PREDICT_FALSE(
            *options.independent_pos() >
            Position{std::numeric_limits<fd_internal::Offset>::max()})) {
      FailOverflow();
      return;
    }
    set_start_pos(*options.independent_pos());
    supports_random_access_ = LazyBoolState::kTrue;
    if (
#ifdef _WIN32
        mode_was_passed_to_open &&
#endif
        supports_read_mode_ == LazyBoolState::kUnknown) {
      supports_read_mode_ = LazyBoolState::kTrue;
    }
  } else {
    const fd_internal::Offset file_pos = fd_internal::LSeek(
        dest, 0, (options.mode() & O_APPEND) != 0 ? SEEK_END : SEEK_CUR);
    if (file_pos < 0) {
      // Random access is not supported. Assume 0 as the initial position.
      supports_random_access_ = LazyBoolState::kFalse;
      supports_read_mode_ = LazyBoolState::kFalse;
      random_access_status_ =
          FailedOperationStatus(fd_internal::kLSeekFunctionName);
      read_mode_status_.Update(random_access_status_);
      return;
    }
    set_start_pos(IntCast<Position>(file_pos));
    if ((options.mode() & O_APPEND) != 0) {
      // `fd_internal::LSeek(SEEK_END)` succeeded.
      supports_random_access_ = LazyBoolState::kFalse;
      if (
#ifdef _WIN32
          mode_was_passed_to_open &&
#endif
          supports_read_mode_ == LazyBoolState::kUnknown) {
        supports_read_mode_ = LazyBoolState::kTrue;
      }
      random_access_status_ = Global([] {
        return absl::UnimplementedError("Append mode excludes random access");
      });
    } else {
      // `fd_internal::LSeek(SEEK_CUR)` succeeded, and
      // `fd_internal::LSeek(SEEK_END)` will be checked later.
      //  `supports_random_access_` and `supports_read_mode_` are left as
      // `LazyBoolState::kUnknown`.
    }
  }
  BeginRun();
}

void FdWriterBase::Done() {
  BufferedWriter::Done();
#ifdef _WIN32
  if (original_mode_ != std::nullopt) {
    const int dest = DestFd();
    if (ABSL_PREDICT_FALSE(_setmode(dest, *original_mode_) < 0)) {
      FailOperation("_setmode()");
    }
  }
#endif  // _WIN32
  random_access_status_ = absl::OkStatus();
  read_mode_status_ = absl::OkStatus();
  associated_reader_.Reset();
}

inline absl::Status FdWriterBase::FailedOperationStatus(
    absl::string_view operation) {
  const int error_number = errno;
  RIEGELI_ASSERT_NE(error_number, 0)
      << "Failed precondition of FdWriterBase::FailedOperationStatus(): "
         "zero errno";
  return absl::ErrnoToStatus(error_number, absl::StrCat(operation, " failed"));
}

bool FdWriterBase::FailOperation(absl::string_view operation) {
  return Fail(FailedOperationStatus(operation));
}

#ifdef _WIN32

absl::Status FdWriterBase::FailedWindowsOperationStatus(
    absl::string_view operation) {
  const DWORD error_number = GetLastError();
  RIEGELI_ASSERT_NE(error_number, 0)
      << "Failed precondition of FdWriterBase::FailedWindowsOperationStatus(): "
         "zero error code";
  return WindowsErrorToStatus(IntCast<uint32_t>(error_number),
                              absl::StrCat(operation, " failed"));
}

bool FdWriterBase::FailWindowsOperation(absl::string_view operation) {
  return Fail(FailedWindowsOperationStatus(operation));
}

#endif  // _WIN32

absl::Status FdWriterBase::AnnotateStatusImpl(absl::Status status) {
  return BufferedWriter::AnnotateStatusImpl(
      Annotate(status, absl::StrCat("writing ", filename())));
}

inline absl::Status FdWriterBase::SizeStatus() {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of FdWriterBase::SizeStatus()";
  const int dest = DestFd();
  if (fd_internal::LSeek(dest, 0, SEEK_END) < 0) {
    // Not supported.
    return FailedOperationStatus(fd_internal::kLSeekFunctionName);
  }
  // Supported.
  if (ABSL_PREDICT_FALSE(
          fd_internal::LSeek(dest, IntCast<fd_internal::Offset>(start_pos()),
                             SEEK_SET) < 0)) {
    FailOperation(fd_internal::kLSeekFunctionName);
    return status();
  }
  return absl::OkStatus();
}

bool FdWriterBase::SupportsRandomAccess() {
  if (ABSL_PREDICT_TRUE(supports_random_access_ != LazyBoolState::kUnknown)) {
    return supports_random_access_ == LazyBoolState::kTrue;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Status status = SizeStatus();
  if (!status.ok()) {
    // Not supported.
    supports_random_access_ = LazyBoolState::kFalse;
    supports_read_mode_ = LazyBoolState::kFalse;
    random_access_status_ = std::move(status);
    read_mode_status_.Update(random_access_status_);
    return false;
  }
  // Supported.
  supports_random_access_ = LazyBoolState::kTrue;
#ifndef _WIN32
  if (supports_read_mode_ == LazyBoolState::kUnknown) {
    supports_read_mode_ = LazyBoolState::kTrue;
  }
#endif
  return true;
}

bool FdWriterBase::SupportsReadMode() {
  if (ABSL_PREDICT_TRUE(supports_read_mode_ != LazyBoolState::kUnknown)) {
    return supports_read_mode_ == LazyBoolState::kTrue;
  }
#ifndef _WIN32
  RIEGELI_ASSERT_EQ(supports_random_access_, LazyBoolState::kUnknown)
      << "Failed invariant of FdWriterBase: "
         "supports_random_access_ is resolved but supports_read_mode_ is not";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::Status status = SizeStatus();
  if (!status.ok()) {
    // Not supported.
    supports_random_access_ = LazyBoolState::kFalse;
    supports_read_mode_ = LazyBoolState::kFalse;
    random_access_status_ = std::move(status);
    read_mode_status_ = random_access_status_;
    return false;
  }
  // Supported.
  supports_random_access_ = LazyBoolState::kTrue;
  supports_read_mode_ = LazyBoolState::kTrue;
  return true;
#else   // _WIN32
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (supports_random_access_ == LazyBoolState::kUnknown) {
    // It is unknown whether even size is supported.
    absl::Status status = SizeStatus();
    if (!status.ok()) {
      // Not supported.
      supports_random_access_ = LazyBoolState::kFalse;
      supports_read_mode_ = LazyBoolState::kFalse;
      random_access_status_ = std::move(status);
      read_mode_status_ = random_access_status_;
      return false;
    }
    // Size is supported.
    supports_random_access_ = LazyBoolState::kTrue;
  }

  const int dest = DestFd();
  if (has_independent_pos_) {
    const HANDLE file_handle = reinterpret_cast<HANDLE>(_get_osfhandle(dest));
    if (file_handle == INVALID_HANDLE_VALUE ||
        file_handle == reinterpret_cast<HANDLE>(-2)) {
      // Not supported.
      supports_read_mode_ = LazyBoolState::kFalse;
      read_mode_status_ = absl::UnimplementedError("Invalid _get_osfhandle()");
      return false;
    }
    char buf[1];
    DWORD length_read;
    OVERLAPPED overlapped{};
    overlapped.Offset = IntCast<DWORD>(start_pos() & 0xffffffff);
    overlapped.OffsetHigh = IntCast<DWORD>(start_pos() >> 32);
    if (!ReadFile(file_handle, &buf, 1, &length_read, &overlapped) &&
        GetLastError() != ERROR_HANDLE_EOF) {
      supports_read_mode_ = LazyBoolState::kFalse;
      read_mode_status_ = FailedWindowsOperationStatus("ReadFile()");
      return false;
    }
    // Supported.
    supports_read_mode_ = LazyBoolState::kTrue;
    return true;
  } else {
    if (fd_internal::LSeek(dest, 0, SEEK_END) < 0) {
      // Not supported.
      supports_read_mode_ = LazyBoolState::kFalse;
      read_mode_status_ =
          FailedOperationStatus(fd_internal::kLSeekFunctionName);
      return false;
    }
    char buf[1];
    if (_read(dest, buf, 1) < 0) {
      // Not supported.
      supports_read_mode_ = LazyBoolState::kFalse;
      read_mode_status_ = FailedOperationStatus("_read()");
    } else {
      // Supported.
      supports_read_mode_ = LazyBoolState::kTrue;
    }
    if (ABSL_PREDICT_FALSE(
            fd_internal::LSeek(dest, IntCast<fd_internal::Offset>(start_pos()),
                               SEEK_SET) < 0)) {
      return FailOperation(fd_internal::kLSeekFunctionName);
    }
    return supports_read_mode_ == LazyBoolState::kTrue;
  }
#endif  // _WIN32
}

inline bool FdWriterBase::WriteMode() {
  if (ABSL_PREDICT_TRUE(!read_mode_)) return true;
  read_mode_ = false;
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const int dest = DestFd();
  return SeekInternal(dest, start_pos());
}

bool FdWriterBase::WriteInternal(absl::string_view src) {
  RIEGELI_ASSERT(!src.empty())
      << "Failed precondition of BufferedWriter::WriteInternal(): "
         "nothing to write";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedWriter::WriteInternal()";
  if (ABSL_PREDICT_FALSE(!WriteMode())) return false;
  const int dest = DestFd();
  if (ABSL_PREDICT_FALSE(
          src.size() >
          Position{std::numeric_limits<fd_internal::Offset>::max()} -
              start_pos())) {
    return FailOverflow();
  }
  do {
#ifndef _WIN32
  again:
    const size_t length_to_write = UnsignedMin(
        src.size(),
        absl::bit_floor(size_t{std::numeric_limits<ssize_t>::max()}),
        // Darwin and FreeBSD cannot write more than 2 GB - 1 at a time.
        // Limit to 1 GB for better alignment of writes.
        // https://codereview.appspot.com/89900044#msg9
        size_t{1} << 30);
    const ssize_t length_written =
        has_independent_pos_ ? pwrite(dest, src.data(), length_to_write,
                                      IntCast<fd_internal::Offset>(start_pos()))
                             : write(dest, src.data(), length_to_write);
    if (ABSL_PREDICT_FALSE(length_written < 0)) {
      if (errno == EINTR) goto again;
      return FailOperation(has_independent_pos_ ? "pwrite()" : "write()");
    }
#else   // _WIN32
    DWORD length_to_write;
    DWORD length_written;
    if (has_independent_pos_) {
      const HANDLE file_handle = reinterpret_cast<HANDLE>(_get_osfhandle(dest));
      if (ABSL_PREDICT_FALSE(file_handle == INVALID_HANDLE_VALUE ||
                             file_handle == reinterpret_cast<HANDLE>(-2))) {
        return FailWindowsOperation("_get_osfhandle()");
      }
      length_to_write = UnsignedMin(
          src.size(), absl::bit_floor(std::numeric_limits<DWORD>::max()));
      OVERLAPPED overlapped{};
      overlapped.Offset = IntCast<DWORD>(start_pos() & 0xffffffff);
      overlapped.OffsetHigh = IntCast<DWORD>(start_pos() >> 32);
      if (ABSL_PREDICT_FALSE(!WriteFile(file_handle, src.data(),
                                        length_to_write, &length_written,
                                        &overlapped))) {
        return FailWindowsOperation("WriteFile()");
      }
    } else {
      length_to_write = UnsignedMin(
          src.size(),
          absl::bit_floor(unsigned{std::numeric_limits<int>::max()}));
      const int length_written_int =
          _write(dest, src.data(), IntCast<unsigned>(length_to_write));
      if (ABSL_PREDICT_FALSE(length_written_int < 0)) {
        return FailOperation("_write()");
      }
      length_written = IntCast<DWORD>(length_written_int);
    }
#endif  // _WIN32
    RIEGELI_ASSERT_GT(length_written, 0)
#ifndef _WIN32
        << (has_independent_pos_ ? "pwrite()" : "write()")
#else
        << (has_independent_pos_ ? "WriteFile()" : "_write()")
#endif
        << " returned 0";
    RIEGELI_ASSERT_LE(UnsignedCast(length_written), length_to_write)
#ifndef _WIN32
        << (has_independent_pos_ ? "pwrite()" : "write()")
#else
        << (has_independent_pos_ ? "WriteFile()" : "_write()")
#endif
        << " wrote more than requested";
    move_start_pos(IntCast<size_t>(length_written));
    src.remove_prefix(IntCast<size_t>(length_written));
  } while (!src.empty());
  return true;
}

bool FdWriterBase::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  if (src.fill() != '\0' || !FdWriterBase::SupportsRandomAccess()) {
    return BufferedWriter::WriteSlow(src);
  }
  const std::optional<Position> size = SizeImpl();
  if (ABSL_PREDICT_FALSE(size == std::nullopt)) return false;
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "BufferedWriter::SizeImpl() flushes the buffer";
  if (ABSL_PREDICT_FALSE(
          src.size() >
          Position{std::numeric_limits<fd_internal::Offset>::max()} -
              start_pos())) {
    return FailOverflow();
  }
  const Position new_pos = start_pos() + src.size();
  if (new_pos < *size) {
    // Existing data after zeros must be preserved. Optimization below is not
    // feasible.
    return BufferedWriter::WriteSlow(src);
  }

  // Optimize extending with zeros by calling `ftruncate()` (`_chsize_s()` on
  // Windows).
  const int dest = DestFd();
  if (start_pos() < *size) {
    // Remove the part to be overwritten with zeros.
    if (ABSL_PREDICT_FALSE(!TruncateInternal(dest, start_pos()))) return false;
  }
  // Extend with zeros.
  if (ABSL_PREDICT_FALSE(!TruncateInternal(dest, new_pos))) return false;
  return SeekInternal(dest, new_pos);
}

bool FdWriterBase::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!BufferedWriter::FlushImpl(flush_type))) return false;
  switch (flush_type) {
    case FlushType::kFromObject:
    case FlushType::kFromProcess:
      return true;
    case FlushType::kFromMachine: {
      const int dest = DestFd();
#ifndef _WIN32
      if (ABSL_PREDICT_FALSE(fsync(dest) < 0)) {
        return FailOperation("fsync()");
      }
#else   // _WIN32
      if (ABSL_PREDICT_FALSE(_commit(dest) < 0)) {
        return FailOperation("_commit()");
      }
#endif  // _WIN32
      return true;
    }
  }
  RIEGELI_ASSUME_UNREACHABLE()
      << "Unknown flush type: " << static_cast<int>(flush_type);
}

bool FdWriterBase::FlushBehindBuffer(absl::string_view src,
                                     FlushType flush_type) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::FlushBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!WriteMode())) return false;
  return BufferedWriter::FlushBehindBuffer(src, flush_type);
}

inline bool FdWriterBase::SeekInternal(int dest, Position new_pos) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of FdWriterBase::SeekInternal(): "
         "buffer not empty";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of FdWriterBase::SeekInternal()";
  if (!has_independent_pos_) {
    if (ABSL_PREDICT_FALSE(
            fd_internal::LSeek(dest, IntCast<fd_internal::Offset>(new_pos),
                               SEEK_SET) < 0)) {
      return FailOperation(fd_internal::kLSeekFunctionName);
    }
  }
  set_start_pos(new_pos);
  return true;
}

bool FdWriterBase::SeekBehindBuffer(Position new_pos) {
  RIEGELI_ASSERT_NE(new_pos, pos())
      << "Failed precondition of BufferedWriter::SeekBehindBuffer(): "
         "position unchanged, use Seek() instead";
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::SeekBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!FdWriterBase::SupportsRandomAccess())) {
    if (ok()) Fail(random_access_status_);
    return false;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  read_mode_ = false;
  const int dest = DestFd();
  if (new_pos > start_pos()) {
    // Seeking forwards.
    fd_internal::StatInfo stat_info;
    if (ABSL_PREDICT_FALSE(fd_internal::FStat(dest, &stat_info) < 0)) {
      return FailOperation(fd_internal::kFStatFunctionName);
    }
    if (ABSL_PREDICT_FALSE(new_pos > IntCast<Position>(stat_info.st_size))) {
      // File ends.
      SeekInternal(dest, IntCast<Position>(stat_info.st_size));
      return false;
    }
  }
  return SeekInternal(dest, new_pos);
}

std::optional<Position> FdWriterBase::SizeBehindBuffer() {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::SizeBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!FdWriterBase::SupportsRandomAccess())) {
    if (ok()) Fail(random_access_status_);
    return std::nullopt;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  const int dest = DestFd();
  fd_internal::StatInfo stat_info;
  if (ABSL_PREDICT_FALSE(fd_internal::FStat(dest, &stat_info) < 0)) {
    FailOperation(fd_internal::kFStatFunctionName);
    return std::nullopt;
  }
  return IntCast<Position>(stat_info.st_size);
}

inline bool FdWriterBase::TruncateInternal(int dest, Position new_size) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of FdWriterBase::TruncateInternal(): "
         "buffer not empty";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of FdWriterBase::TruncateInternal()";
#ifndef _WIN32
again:
  if (ABSL_PREDICT_FALSE(
          ftruncate(dest, IntCast<fd_internal::Offset>(new_size)) < 0)) {
    if (errno == EINTR) goto again;
    return FailOperation("ftruncate()");
  }
#else   // _WIN32
  if (ABSL_PREDICT_FALSE(
          _chsize_s(dest, IntCast<fd_internal::Offset>(new_size)) != 0)) {
    return FailOperation("_chsize_s()");
  }
#endif  // _WIN32
  return true;
}

bool FdWriterBase::TruncateBehindBuffer(Position new_size) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::TruncateBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  read_mode_ = false;
  const int dest = DestFd();
  if (new_size >= start_pos()) {
    // Seeking forwards.
    fd_internal::StatInfo stat_info;
    if (ABSL_PREDICT_FALSE(fd_internal::FStat(dest, &stat_info) < 0)) {
      return FailOperation(fd_internal::kFStatFunctionName);
    }
    if (ABSL_PREDICT_FALSE(new_size > IntCast<Position>(stat_info.st_size))) {
      // File ends.
      SeekInternal(dest, IntCast<Position>(stat_info.st_size));
      return false;
    }
  }
  if (ABSL_PREDICT_FALSE(!TruncateInternal(dest, new_size))) return false;
  return SeekInternal(dest, new_size);
}

Reader* FdWriterBase::ReadModeBehindBuffer(Position initial_pos) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::ReadModeBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!FdWriterBase::SupportsReadMode())) {
    if (ok()) Fail(read_mode_status_);
    return nullptr;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  FdReader<UnownedFd>* const reader = associated_reader_.ResetReader(
      UnownedFd(DestFdHandle()),
      FdReaderBase::Options()
          .set_independent_pos(has_independent_pos_
                                   ? std::make_optional(initial_pos)
                                   : std::nullopt)
          .set_buffer_options(buffer_options()));
  if (!has_independent_pos_) reader->Seek(initial_pos);
  read_mode_ = true;
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/fd_writer.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_FD_WRITER_H_
#define RIEGELI_BYTES_FD_WRITER_H_

#include <fcntl.h>
#include <stdint.h>
#ifdef _WIN32
#include <sys/stat.h>
#endif

#include <optional>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/type_id.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/fd_handle.h"
#include "riegeli/bytes/fd_internal.h"
#include "riegeli/bytes/path_ref.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

template <typename Src>
class FdReader;
class Reader;

// Template parameter independent part of `FdWriter`.
class FdWriterBase : public BufferedWriter {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // If `FdWriter` opens a fd with a filename, `mode()` is the second argument
    // of `open()` (on Windows: `_open()`) and specifies the open mode and
    // flags, typically one of:
    //  * `O_WRONLY | O_CREAT | O_TRUNC`
    //    (on Windows: `_O_WRONLY | _O_CREAT | _O_TRUNC | _O_BINARY`)
    //  * `O_WRONLY | O_CREAT | O_APPEND`
    //    (on Windows: `_O_WRONLY | _O_CREAT | _O_APPEND | _O_BINARY`)
    //
    // It must include either `O_WRONLY` or `O_RDWR` (on Windows: `_O_WRONLY` or
    // `_O_RDWR`).
    //
    // `mode()` can also be changed with `set_existing()`, `set_read()`,
    // `set_append()`, `set_exclusive()`, `set_inheritable()`, and `set_text()`.
    //
    // Default: `O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC`
    // (on Windows: `_O_WRONLY | _O_CREAT | _O_TRUNC | _O_BINARY |
    //               _O_NOINHERIT`).
    Options& set_mode(int mode) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      mode_ = mode;
      return *this;
    }
    Options&& set_mode(int mode) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_mode(mode));
    }
    int mode() const { return mode_; }

    // If `false`, the file will be created if it does not exist, or it will be
    // truncated to empty if it exists. This implies `set_read(false)` and
    // `set_append(false)` unless overwritten later.
    //
    // If `true`, the file must already exist, and its contents will not be
    // truncated. Writing will start from the beginning, with random access
    // supported. This implies `set_read(true)` unless overwritten later.
    //
    // If `FdWriter` writes to an already open fd, `existing()` has no effect.
    //
    // `set_existing()` affects `mode()`.
    //
    // Default: `false`.
    Options& set_existing(bool existing) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
#ifndef _WIN32
      mode_ = (mode_ & ~(O_ACCMODE | O_CREAT | O_TRUNC | O_APPEND)) |
              (existing ? O_RDWR : O_WRONLY | O_CREAT | O_TRUNC);
#else
      mode_ = (mode_ & ~(_O_RDONLY | _O_WRONLY | _O_RDWR | _O_CREAT | _O_TRUNC |
                         _O_APPEND)) |
              (existing ? _O_RDWR : _O_WRONLY | _O_CREAT | _O_TRUNC);
#endif
      return *this;
    }
    Options&& set_existing(bool existing) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_existing(existing));
    }
    bool existing() const {
#ifndef _WIN32
      return (mode_ & O_CREAT) == 0;
#else
      return (mode_ & _O_CREAT) == 0;
#endif
    }

    // If `false`, the fd will be open for writing.
    //
    // If `true`, the fd will be open for writing and reading (using
    // `ReadMode()`).
    //
    // If `FdWriter` writes to an already open fd, `read()` has no effect.
    //
    // `set_read()` affects `mode()`.
    //
    // Default: `false`.
    Options& set_read(bool read) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
#ifndef _WIN32
      mode_ = (mode_ & ~O_ACCMODE) | (read ? O_RDWR : O_WRONLY);
#else
      mode_ = (mode_ & ~(_O_RDONLY | _O_WRONLY | _O_RDWR)) |
              (read ? _O_RDWR : _O_WRONLY);
#endif
      return *this;
    }
    Options&& set_read(bool read) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_read(read));
    }
    bool read() const {
#ifndef _WIN32
      return (mode_ & O_ACCMODE) == O_RDWR;
#else
      return (mode_ & (_O_RDONLY | _O_WRONLY | _O_RDWR)) == _O_RDWR;
#endif
    }

    // If `false`, the file will be truncated to empty if it exists.
    //
    // If `true`, the file will not be truncated if it exists, and writing will
    // always happen at its end.
    //
    // If `FdWriter` writes to an already open fd, `append()` has effect only on
    // Windows. If `assumed_pos()` is not set, `append()` should be `true` if
    // the fd was originally open in append mode. This allows to determine the
    // effective initial position and lets `SupportsRandomAccess()` correctly
    // return `false`.
    //
    // `set_append()` affects `mode()`.
    //
    // Default: `false`.
    Options& set_append(bool append) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
#ifndef _WIN32
      mode_ = (mode_ & ~(O_TRUNC | O_APPEND)) | (append ? O_APPEND : O_TRUNC);
#else
      mode_ =
          (mode_ & ~(_O_TRUNC | _O_APPEND)) | (append ? _O_APPEND : _O_TRUNC);
#endif
      return *this;
    }
    Options&& set_append(bool append) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_append(append));
    }
    bool append() const {
#ifndef _WIN32
      return (mode_ & O_APPEND) != 0;
#else
      return (mode_ & _O_APPEND) != 0;
#endif
    }

    // If `false`, the file will be created if it does not exist, or it will be
    // opened if it exists (truncated to empty by default, or left unchanged if
    // `set_existing(true)` or `set_append(true)` was used).
    //
    // If `true`, the file will be created if it does not exist, or opening will
    // fail if it exists.
    //
    // If `FdWriter` writes to an already open fd, `exclusive()` has no effect.
    //
    // `set_exclusive()` affects `mode()`.
    //
    // Default: `false`.
    Options& set_exclusive(bool exclusive) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
#ifndef _WIN32
      mode_ = (mode_ & ~O_EXCL) | (exclusive ? O_EXCL : 0);
#else
      mode_ = (mode_ & ~_O_EXCL) | (exclusive ? _O_EXCL : 0);
#endif
      return *this;
    }
    Options&& set_exclusive(bool exclusive) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_exclusive(exclusive));
    }
    bool exclusive() const {
#ifndef _WIN32
      return (mode_ & O_EXCL) != 0;
#else
      return (mode_ & _O_EXCL) != 0;
#endif
    }

    // If `false`, `execve()` (`CreateProcess()` on Windows) will close the fd.
    //
    // If `true`, the fd will remain open across `execve()` (`CreateProcess()`
    // on Windows).
    //
    // If `FdWriter` writes to an already open fd, `inheritable()` has no
    // effect.
    //
    // `set_inheritable()` affects `mode()`.
    //
    // Default: `false`.
    Options& set_inheritable(bool inheritable) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      mode_ = (mode_ & ~fd_internal::kCloseOnExec) |
              (inheritable ? 0 : fd_internal::kCloseOnExec);
      return *this;
    }
    Options&& set_inheritable(bool inheritable) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_inheritable(inheritable));
    }
    bool inheritable() const {
      return (mode_ & fd_internal::kCloseOnExec) == 0;
    }

    // If `false`, data will be written directly to the file. This is called the
    // binary mode.
    //
    // If `true`, text mode translation will be applied on Windows:
    // LF characters are translated to CR-LF.
    //
    // It is recommended to use `WriteLine()` or `TextWriter` instead, which
    // expect a binary mode `Writer`.
    //
    // `set_text()` has an effect only on Windows. It is applicable whenever
    // `FdWriter` opens a fd with a filename or writes to an already open fd.
    //
    // `set_text()` affects `mode()`.
    //
    // Default: `false`.
    Options& set_text(ABSL_ATTRIBUTE_UNUSED bool text) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
#ifdef _WIN32
      mode_ =
          (mode_ & ~(_O_BINARY | _O_TEXT | _O_WTEXT | _O_U16TEXT | _O_U8TEXT)) |
          (text ? _O_TEXT : _O_BINARY);
#endif
      return *this;
    }
    Options&& set_text(bool text) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_text(text));
    }
    // No `text()` getter is provided. On Windows `mode()` can have unspecified
    // text mode, resolved using `_get_fmode()`. Not on Windows the concept does
    // not exist.

    // Permissions to use in case a new file is created (9 bits, except on
    // Windows: `_S_IREAD`, `_S_IWRITE`, or `_S_IREAD | _S_IWRITE`). The
    // effective permissions are modified by the process' umask.
    //
    // Default: `0666` (on Windows: `_S_IREAD | _S_IWRITE`).
    Options& set_permissions(OwnedFd::Permissions permissions) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      permissions_ = permissions;
      return *this;
    }
    Options&& set_permissions(OwnedFd::Permissions permissions) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_permissions(permissions));
    }
    OwnedFd::Permissions permissions() const { return permissions_; }

    // If `std::nullopt`, the current position reported by `pos()` corresponds
    // to the current fd position if possible, otherwise 0 is assumed as the
    // initial position. Random access is supported if the fd supports random
    // access. On Windows binary mode is also required.
    //
    // If not `std::nullopt`, this position is assumed initially, to be reported
    // by `pos()`. It does not need to correspond to the current fd position.
    // Random access is not supported.
    //
    // `assumed_pos()` and `independent_pos()` must not be both set.
    //
    // Default: `std::nullopt`.
    Options& set_assumed_pos(std::optional<Position> assumed_pos) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      assumed_pos_ = assumed_pos;
      return *this;
    }
    Options&& set_assumed_pos(std::optional<Position> assumed_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_assumed_pos(assumed_pos));
    }
    std::optional<Position> assumed_pos() const { return assumed_pos_; }

    // If `std::nullopt`, `FdWriter` writes at the current fd position.
    //
    // If not `std::nullopt`, `FdWriter` writes starting from this position.
    // The current fd position is not disturbed except on Windows, where seeking
    // and writing is nevertheless atomic. This is useful for multiple writers
    // concurrently writing to disjoint regions of the same file. The fd must
    // support `pwrite()` (`_get_osfhandle()` and `WriteFile()` with
    // `OVERLAPPED*` on Windows). On Windows binary mode is also required.
    //
    // `assumed_pos()` and `independent_pos()` must not be both set.
    //
    // If the original open mode of the fd includes `O_APPEND` then
    // `independent_pos()` must not be set.
    //
    // Default: `std::nullopt`.
    Options& set_independent_pos(std::optional<Position> independent_pos) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      independent_pos_ = independent_pos;
      return *this;
    }
    Options&& set_independent_pos(std::optional<Position> independent_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_independent_pos(independent_pos));
    }
    std::optional<Position> independent_pos() const { return independent_pos_; }

   private:
#ifndef _WIN32
    int mode_ = O_WRONLY | O_CREAT | O_TRUNC | fd_internal::kCloseOnExec;
#else
    int mode_ =
        _O_WRONLY | _O_CREAT | _O_TRUNC | _O_BINARY | fd_internal::kCloseOnExec;
#endif
    OwnedFd::Permissions permissions_ = OwnedFd::kDefaultPermissions;
    std::optional<Position> assumed_pos_;
    std::optional<Position> independent_pos_;
  };

  // Returns the `FdHandle` being written to. Unchanged by `Close()`.
  virtual FdHandle DestFdHandle() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns the fd being written to. If the fd is owned then changed to -1 by
  // `Close()`, otherwise unchanged.
  virtual int DestFd() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  TypeId GetTypeId() const override;

  // Returns the filename of the fd being written to, or "<none>" for
  // closed-constructed or moved-from `FdWriter`. Unchanged by `Close()`.
  //
  // If the constructor from filename was used, this is the filename passed to
  // the constructor, otherwise a filename is inferred from the fd. This can be
  // a placeholder instead of a real filename if the fd does not refer to a
  // named file or inferring the filename is not supported.
  //
  // If `Dest` does not support `filename()`, returns "<unsupported>".
  absl::string_view filename() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return DestFdHandle().filename();
  }

  bool SupportsRandomAccess() override;
  bool SupportsReadMode() override;

 protected:
  explicit FdWriterBase(Closed) noexcept : BufferedWriter(kClosed) {}

  explicit FdWriterBase(BufferOptions buffer_options);

  FdWriterBase(FdWriterBase&& that) noexcept;
  FdWriterBase& operator=(FdWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options);
  void Initialize(int dest, Options&& options);
  void InitializePos(int dest, Options&& options, bool mode_was_passed_to_open);
  ABSL_ATTRIBUTE_COLD bool FailOperation(absl::string_view operation);
#ifdef _WIN32
  ABSL_ATTRIBUTE_COLD bool FailWindowsOperation(absl::string_view operation);
#endif

  void Done() override;
  absl::Status AnnotateStatusImpl(absl::Status status) override;
  bool WriteInternal(absl::string_view src) override;
  bool WriteSlow(ByteFill src) override;
  bool FlushImpl(FlushType flush_type) override;
  bool FlushBehindBuffer(absl::string_view src, FlushType flush_type) override;
  bool SeekBehindBuffer(Position new_pos) override;
  std::optional<Position> SizeBehindBuffer() override;
  bool TruncateBehindBuffer(Position new_size) override;
  Reader* ReadModeBehindBuffer(Position initial_pos) override;

 private:
  friend class FdReaderBase;  // For `has_independent_pos_`.

  // Encodes a `bool` or a marker that the value is not resolved yet.
  enum class LazyBoolState : uint8_t { kUnknown, kTrue, kFalse };

  absl::Status FailedOperationStatus(absl::string_view operation);
#ifdef _WIN32
  absl::Status FailedWindowsOperationStatus(absl::string_view operation);
#endif
  // Lazily determined condition shared by `SupportsRandomAccess()` and
  // `SupportsReadMode()`.
  absl::Status SizeStatus();

  bool WriteMode();
  bool SeekInternal(int dest, Position new_pos);
  bool TruncateInternal(int dest, Position new_size);

  bool has_independent_pos_ = false;
  // Invariant except on Windows:
  //   if `supports_read_mode_ == LazyBoolState::kUnknown` then
  //       `supports_random_access_ == LazyBoolState::kUnknown`
  LazyBoolState supports_random_access_ = LazyBoolState::kUnknown;
  // If `supports_read_mode_ == LazyBoolState::kUnknown`,
  // then at least size is known to be supported
  // when `supports_random_access_ != LazyBoolState::kUnknown`
  // (no matter whether `LazyBoolState::kTrue` or LazyBoolState::kFalse`).
  //
  // This is useful on Windows, otherwise this is trivially true
  // (`supports_random_access_ == LazyBoolState::kUnknown`).
  LazyBoolState supports_read_mode_ = LazyBoolState::kUnknown;
  absl::Status random_access_status_;
  absl::Status read_mode_status_;
#ifdef _WIN32
  std::optional<int> original_mode_;
#endif

  AssociatedReader<FdReader<UnownedFd>> associated_reader_;
  bool read_mode_ = false;

  // Invariant: `start_pos() <= std::numeric_limits<fd_internal::Offset>::max()`
};

// A `Writer` which writes to a file descriptor.
//
// The fd must support:
#ifndef _WIN32
//  * `fcntl()`     - for the constructor from fd
//  * `close()`     - if the fd is owned
//  * `write()`     - if `Options::independent_pos() == std::nullopt`
//  * `pwrite()`    - if `Options::independent_pos() != std::nullopt`
//  * `lseek()`     - for `Seek()`, `Size()`, or `Truncate()`,
//                    if `Options::independent_pos() == std::nullopt`
//  * `fstat()`     - for `Seek()`, `Size()`, or `Truncate()`
//  * `fsync()`     - for `Flush(FlushType::kFromMachine)`
//  * `ftruncate()` - for `Truncate()`
//  * `read()`      - for `ReadMode()`
//                    if `Options::independent_pos() == std::nullopt`
//                    (fd must be opened with `O_RDWR`)
//  * `pread()`     - for `ReadMode()`
//                    if `Options::independent_pos() != std::nullopt`
//                    (fd must be opened with `O_RDWR`)
#else
//  * `_close()`    - if the fd is owned
//  * `_write()`    - if `Options::independent_pos() == std::nullopt`
//  * `_get_osfhandle()`, `WriteFile()` with `OVERLAPPED*`
//                  - if `Options::independent_pos() != std::nullopt`
//  * `_lseeki64()` - for `Seek()`, `Size()`, or `Truncate(),
//                    if `Options::independent_pos() == std::nullopt`
//  * `_fstat64()`  - for `Seek()`, `Size()`, or `Truncate(),
//  * `_commit()`   - for `Flush(FlushType::kFromMachine)`
//  * `_chsize_s()` - for `Truncate()`
//  * `_read()`     - for `ReadMode()`
//                    if `Options::independent_pos() == std::nullopt`
//                    (fd must be opened with `_O_RDWR`)
//  * `_get_osfhandle()`, `ReadFile()` with `OVERLAPPED*`
//                  - for `ReadMode()`
//                    if `Options::independent_pos() != std::nullopt`
//                    (fd must be opened with `_O_RDWR`)
#endif
//
// `FdWriter` supports random access if
// `Options::assumed_pos() == std::nullopt` and the fd supports random access
// (this is assumed if `Options::independent_pos() != std::nullopt`, otherwise
// this is checked by calling `lseek(SEEK_END)`, or `_lseeki64()` on Windows).
// On Windows binary mode is also required.
//
// `FdWriter` supports `ReadMode()` if it supports random access and the fd was
// opened with `O_RDWR` (`_O_RDWR` on Windows).
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the fd being written to. `Dest` must support
// `Dependency<FdHandle, Dest>`, e.g. `OwnedFd` (owned, default),
// `UnownedFd` (not owned), `AnyFd` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `OwnedFd` if the
// first constructor argument is a filename or an `int`, otherwise as `TargetT`
// of the type of the first constructor argument.
//
// Until the `FdWriter` is closed or no longer used, the fd must not be closed.
// Additionally, if `Options::independent_pos() == std::nullopt`
// (or unconditionally on Windows), the fd should not have its position changed,
// except that if random access is not used, careful interleaving of multiple
// writers is possible: `Flush()` is needed before switching to another writer,
// and `pos()` does not take other writers into account.
template <typename Dest = OwnedFd>
class FdWriter : public FdWriterBase {
 public:
  // Creates a closed `FdWriter`.
  explicit FdWriter(Closed) noexcept : FdWriterBase(kClosed) {}

  // Will write to the fd provided by `dest`.
  explicit FdWriter(Initializer<Dest> dest, Options options = Options());

  // Will write to `dest`.
  template <
      typename DependentDest = Dest,
      std::enable_if_t<std::is_constructible_v<DependentDest, int>, int> = 0>
  explicit FdWriter(int dest ABSL_ATTRIBUTE_LIFETIME_BOUND,
                    Options options = Options());

  // Opens a file for writing.
  //
  // If opening the file fails, `FdWriter` will be failed and closed.
  //
  // This constructor is present only if `Dest` supports `Open()`.
  template <typename DependentDest = Dest,
            std::enable_if_t<std::conjunction_v<
                                 FdSupportsOpen<DependentDest>,
                                 std::is_default_constructible<DependentDest>>,
                             int> = 0>
  explicit FdWriter(PathInitializer filename, Options options = Options());

  // Opens a file for writing, with the filename interpreted relatively to the
  // directory specified by an existing fd.
  //
  // If opening the file fails, `FdWriter` will be failed and closed.
  //
  // This constructor is present only if `Dest` supports `OpenAt()`.
  template <typename DependentDest = Dest,
            std::enable_if_t<std::conjunction_v<
                                 FdSupportsOpenAt<DependentDest>,
                                 std::is_default_constructible<DependentDest>>,
                             int> = 0>
  explicit FdWriter(UnownedFd dir_fd, PathRef filename,
                    Options options = Options());

  FdWriter(FdWriter&& that) = default;
  FdWriter& operator=(FdWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `FdWriter`. This avoids
  // constructing a temporary `FdWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());
  template <
      typename DependentDest = Dest,
      std::enable_if_t<std::is_constructible_v<DependentDest, int>, int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(int dest,
                                          Options options = Options());
  template <typename DependentDest = Dest,
            std::enable_if_t<std::conjunction_v<FdSupportsOpen<DependentDest>,
                                                SupportsReset<DependentDest>>,
                             int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(PathInitializer filename,
                                          Options options = Options());
  template <typename DependentDest = Dest,
            std::enable_if_t<std::conjunction_v<FdSupportsOpenAt<DependentDest>,
                                                SupportsReset<DependentDest>>,
                             int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(UnownedFd dir_fd,
                                          PathInitializer filename,
                                          Options options = Options());

  // Returns the object providing and possibly owning the fd being written to.
  // Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  FdHandle DestFdHandle() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }
  int DestFd() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get().get();
  }

  // An optimized implementation in a derived class, avoiding a virtual call.
  absl::string_view filename() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.get().filename();
  }

 protected:
  void Done() override;

 private:
  template <typename DependentDest = Dest,
            std::enable_if_t<FdSupportsOpen<DependentDest>::value, int> = 0>
  void OpenImpl(PathInitializer filename, Options&& options);
  template <typename DependentDest = Dest,
            std::enable_if_t<FdSupportsOpenAt<DependentDest>::value, int> = 0>
  void OpenAtImpl(UnownedFd dir_fd, PathRef filename, Options&& options);

  // The object providing and possibly owning the fd being written to.
  Dependency<FdHandle, Dest> dest_;
};

explicit FdWriter(Closed) -> FdWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit FdWriter(Dest&& dest,
                  FdWriterBase::Options options = FdWriterBase::Options())
    -> FdWriter<std::conditional_t<
        std::disjunction_v<std::is_convertible<Dest&&, int>,
                           std::is_convertible<Dest&&, PathInitializer>>,
        OwnedFd, TargetT<Dest>>>;
explicit FdWriter(UnownedFd dir_fd, PathRef filename,
                  FdWriterBase::Options options = FdWriterBase::Options())
    -> FdWriter<OwnedFd>;

// Implementation details follow.

inline FdWriterBase::FdWriterBase(BufferOptions buffer_options)
    : BufferedWriter(buffer_options) {}

inline FdWriterBase::FdWriterBase(FdWriterBase&& that) noexcept
    : BufferedWriter(static_cast<BufferedWriter&&>(that)),
      has_independent_pos_(that.has_independent_pos_),
      supports_random_access_(
          std::exchange(that.supports_random_access_, LazyBoolState::kUnknown)),
      supports_read_mode_(
          std::exchange(that.supports_read_mode_, LazyBoolState::kUnknown)),
      random_access_status_(std::move(that.random_access_status_)),
      read_mode_status_(std::move(that.read_mode_status_)),
#ifdef _WIN32
      original_mode_(that.original_mode_),
#endif
      associated_reader_(std::move(that.associated_reader_)),
      read_mode_(that.read_mode_) {
}

inline FdWriterBase& FdWriterBase::operator=(FdWriterBase&& that) noexcept {
  BufferedWriter::operator=(static_cast<BufferedWriter&&>(that));
  has_independent_pos_ = that.has_independent_pos_;
  supports_random_access_ =
      std::exchange(that.supports_random_access_, LazyBoolState::kUnknown),
  supports_read_mode_ =
      std::exchange(that.supports_read_mode_, LazyBoolState::kUnknown),
  random_access_status_ = std::move(that.random_access_status_);
  read_mode_status_ = std::move(that.read_mode_status_);
#ifdef _WIN32
  original_mode_ = that.original_mode_;
#endif
  associated_reader_ = std::move(that.associated_reader_);
  read_mode_ = that.read_mode_;
  return *this;
}

inline void FdWriterBase::Reset(Closed) {
  BufferedWriter::Reset(kClosed);
  has_independent_pos_ = false;
  supports_random_access_ = LazyBoolState::kUnknown;
  supports_read_mode_ = LazyBoolState::kUnknown;
  random_access_status_ = absl::OkStatus();
  read_mode_status_ = absl::OkStatus();
#ifdef _WIN32
  original_mode_ = std::nullopt;
#endif
  associated_reader_.Reset();
  read_mode_ = false;
}

inline void FdWriterBase::Reset(BufferOptions buffer_options) {
  BufferedWriter::Reset(buffer_options);
  has_independent_pos_ = false;
  supports_random_access_ = LazyBoolState::kUnknown;
  supports_read_mode_ = LazyBoolState::kUnknown;
  random_access_status_ = absl::OkStatus();
  read_mode_status_ = absl::OkStatus();
#ifdef _WIN32
  original_mode_ = std::nullopt;
#endif
  associated_reader_.Reset();
  read_mode_ = false;
}

template <typename Dest>
inline FdWriter<Dest>::FdWriter(Initializer<Dest> dest, Options options)
    : FdWriterBase(options.buffer_options()), dest_(std::move(dest)) {
  Initialize(dest_.get().get(), std::move(options));
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<std::is_constructible_v<DependentDest, int>, int>>
inline FdWriter<Dest>::FdWriter(int dest ABSL_ATTRIBUTE_LIFETIME_BOUND,
                                Options options)
    : FdWriter(riegeli::Maker(dest), std::move(options)) {}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<
              std::conjunction_v<FdSupportsOpen<DependentDest>,
                                 std::is_default_constructible<DependentDest>>,
              int>>
inline FdWriter<Dest>::FdWriter(PathInitializer filename, Options options)
    : FdWriterBase(options.buffer_options()), dest_(riegeli::Maker()) {
  OpenImpl(std::move(filename), std::move(options));
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<
              std::conjunction_v<FdSupportsOpenAt<DependentDest>,
                                 std::is_default_constructible<DependentDest>>,
              int>>
inline FdWriter<Dest>::FdWriter(UnownedFd dir_fd, PathRef filename,
                                Options options)
    : FdWriterBase(options.buffer_options()), dest_(riegeli::Maker()) {
  OpenAtImpl(std::move(dir_fd), filename, std::move(options));
}

template <typename Dest>
inline void FdWriter<Dest>::Reset(Closed) {
  FdWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void FdWriter<Dest>::Reset(Initializer<Dest> dest, Options options) {
  FdWriterBase::Reset(options.buffer_options());
  dest_.Reset(std::move(dest));
  Initialize(dest_.get().get(), std::move(options));
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<std::is_constructible_v<DependentDest, int>, int>>
inline void FdWriter<Dest>::Reset(int dest, Options options) {
  Reset(riegeli::Maker(dest), std::move(options));
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<std::conjunction_v<FdSupportsOpen<DependentDest>,
                                              SupportsReset<DependentDest>>,
                           int>>
inline void FdWriter<Dest>::Reset(PathInitializer filename, Options options) {
  // In case `filename` is owned by `dest_` and gets invalidated.
  std::string filename_copy = std::move(filename);
  riegeli::Reset(dest_.manager());
  FdWriterBase::Reset(options.buffer_options());
  OpenImpl(std::move(filename_copy), std::move(options));
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<std::conjunction_v<FdSupportsOpenAt<DependentDest>,
                                              SupportsReset<DependentDest>>,
                           int>>
inline void FdWriter<Dest>::Reset(UnownedFd dir_fd, PathInitializer filename,
                                  Options options) {
  // In case `filename` is owned by `dest_` and gets invalidated.
  std::string filename_copy = std::move(filename);
  riegeli::Reset(dest_.manager());
  FdWriterBase::Reset(options.buffer_options());
  OpenAtImpl(dir_fd, filename_copy, std::move(options));
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<FdSupportsOpen<DependentDest>::value, int>>
void FdWriter<Dest>::OpenImpl(PathInitializer filename, Options&& options) {
  absl::Status status = dest_.manager().Open(
      std::move(filename), options.mode(), options.permissions());
  if (ABSL_PREDICT_FALSE(!status.ok())) {
    FdWriterBase::Reset(kClosed);
    FailWithoutAnnotation(std::move(status));
    return;
  }
  InitializePos(dest_.get().get(), std::move(options),
                /*mode_was_passed_to_open=*/true);
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<FdSupportsOpenAt<DependentDest>::value, int>>
void FdWriter<Dest>::OpenAtImpl(UnownedFd dir_fd, PathRef filename,
                                Options&& options) {
  absl::Status status = dest_.manager().OpenAt(
      std::move(dir_fd), filename, options.mode(), options.permissions());
  if (ABSL_PREDICT_FALSE(!status.ok())) {
    FdWriterBase::Reset(kClosed);
    FailWithoutAnnotation(std::move(status));
    return;
  }
  InitializePos(dest_.get().get(), std::move(options),
                /*mode_was_passed_to_open=*/true);
}

template <typename Dest>
void FdWriter<Dest>::Done() {
  FdWriterBase::Done();
  if (dest_.IsOwning()) {
    if (absl::Status status = dest_.get().Close();
        ABSL_PREDICT_FALSE(!status.ok())) {
      Fail(std::move(status));
    }
  }
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_FD_WRITER_H_


================================================
FILE: riegeli/bytes/file_mode_string.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/file_mode_string.h"

#ifdef _WIN32
#include <fcntl.h>
#endif
#include <stddef.h>

#include <string>

#include "absl/base/optimization.h"
#include "absl/strings/string_view.h"

// Syntax of a file mode for `fopen()`:
//  * 'r', 'w', or 'a'.
//  * Single character modifiers, in any order. Some modifiers are standard:
//    '+', 'b', 'x' (since C++17 / C11), while others are OS-specific.
//  * ',ccs=<encoding>'. This is not standard but it is understood by glibc and
//    on Windows. To avoid breaking the encoding name which may use characters
//    ordinarily used as modifiers, functions below stop parsing at ','.

namespace riegeli::file_internal {

void SetExisting(bool existing, std::string& mode) {
  if (ABSL_PREDICT_FALSE(mode.empty())) mode = "w";
  if (existing) {
    mode[0] = 'r';
    // Add '+' to modifiers unless it already exists there.
    for (size_t i = 1; i < mode.size(); ++i) {
      if (mode[i] == '+') return;
      if (mode[i] == ',') break;
    }
    mode.insert(1, "+");
  } else {
    mode[0] = 'w';
    // Remove '+' from modifiers.
    for (size_t i = 1; i < mode.size(); ++i) {
      if (mode[i] == '+') {
        mode.erase(i, 1);
        --i;
        continue;
      }
      if (mode[i] == ',') break;
    }
  }
}

void SetRead(bool read, std::string& mode) {
  if (ABSL_PREDICT_FALSE(mode.empty())) mode = "w";
  if (read) {
    // Add '+' to modifiers unless it already exists there.
    for (size_t i = 1; i < mode.size(); ++i) {
      if (mode[i] == '+') return;
      if (mode[i] == ',') break;
    }
    mode.insert(1, "+");
  } else {
    if (mode[0] == 'r') return;
    // Remove '+' from modifiers.
    for (size_t i = 1; i < mode.size(); ++i) {
      if (mode[i] == '+') {
        mode.erase(i, 1);
        --i;
        continue;
      }
      if (mode[i] == ',') break;
    }
  }
}

bool GetRead(absl::string_view mode) {
  if (ABSL_PREDICT_FALSE(mode.empty())) return false;
  if (mode[0] == 'r') return true;
  for (size_t i = 1; i < mode.size(); ++i) {
    if (mode[i] == '+') return true;
    if (mode[i] == ',') break;
  }
  return false;
}

void SetExclusive(bool exclusive, std::string& mode) {
  if (ABSL_PREDICT_FALSE(mode.empty())) mode = "w";
  if (exclusive) {
    // Add 'x' to modifiers unless it already exists there.
    for (size_t i = 1; i < mode.size(); ++i) {
      if (mode[i] == 'x') return;
      if (mode[i] == ',') break;
    }
    size_t position = 1;
    while (mode.size() > position &&
           (mode[position] == '+' || mode[position] == 'b' ||
            mode[position] == 't')) {
      ++position;
    }
    mode.insert(position, "x");
  } else {
    // Remove 'x' from modifiers.
    for (size_t i = 1; i < mode.size(); ++i) {
      if (mode[i] == 'x') {
        mode.erase(i, 1);
        --i;
        continue;
      }
      if (mode[i] == ',') break;
    }
  }
}

bool GetExclusive(absl::string_view mode) {
  if (ABSL_PREDICT_FALSE(mode.empty())) return false;
  for (size_t i = 1; i < mode.size(); ++i) {
    if (mode[i] == 'x') return true;
    if (mode[i] == ',') break;
  }
  return false;
}

namespace {

void SetInheritableImpl(bool inheritable, std::string& mode) {
#ifndef _WIN32
  static constexpr char kModifier = 'e';
#else
  static constexpr char kModifier = 'N';
#endif
  static constexpr const char kModifierStr[2] = {kModifier, '\0'};
  if (inheritable) {
    // Remove `kModifier` from modifiers.
    for (size_t i = 1; i < mode.size(); ++i) {
      if (mode[i] == kModifier) {
        mode.erase(i, 1);
        --i;
        continue;
      }
      if (mode[i] == ',') break;
    }
  } else {
    // Add `kModifier` to modifiers unless it already exists there.
    for (size_t i = 1; i < mode.size(); ++i) {
      if (mode[i] == kModifier) return;
      if (mode[i] == ',') break;
    }
    size_t position = 1;
    while (mode.size() > position &&
           (mode[position] == '+' || mode[position] == 'b' ||
            mode[position] == 't' || mode[position] == 'x')) {
      ++position;
    }
    mode.insert(position, kModifierStr);
  }
}

}  // namespace

void SetInheritableReading(bool inheritable, std::string& mode) {
  if (ABSL_PREDICT_FALSE(mode.empty())) mode = "r";
  SetInheritableImpl(inheritable, mode);
}

void SetInheritableWriting(bool inheritable, std::string& mode) {
  if (ABSL_PREDICT_FALSE(mode.empty())) mode = "w";
  SetInheritableImpl(inheritable, mode);
}

bool GetInheritable(absl::string_view mode) {
#ifndef _WIN32
  static constexpr char kModifier = 'e';
#else
  static constexpr char kModifier = 'N';
#endif
  if (ABSL_PREDICT_FALSE(mode.empty())) return true;
  for (size_t i = 1; i < mode.size(); ++i) {
    if (mode[i] == kModifier) return false;
    if (mode[i] == ',') break;
  }
  return true;
}

namespace {

inline void SetTextImpl(bool text, std::string& mode) {
#ifdef _WIN32
  const char to_remove = text ? 'b' : 't';
  const char to_add[2] = {text ? 't' : 'b', '\0'};
  bool need_to_add = true;
  for (size_t i = 1; i < mode.size(); ++i) {
    if (mode[i] == to_remove) {
      if (need_to_add) {
        mode[i] = to_add[0];
        need_to_add = false;
      } else {
        mode.erase(i, 1);
        --i;
      }
      continue;
    }
    if (mode[i] == to_add[0]) {
      need_to_add = false;
      continue;
    }
    if (mode[i] == ',') break;
  }
  if (need_to_add) {
    size_t position = 1;
    while (mode.size() > position && mode[position] == '+') ++position;
    mode.insert(position, to_add);
  }
#endif  // _WIN32
}

}  // namespace

void SetTextReading(bool text, std::string& mode) {
  if (ABSL_PREDICT_FALSE(mode.empty())) mode = "r";
  SetTextImpl(text, mode);
}

void SetTextWriting(bool text, std::string& mode) {
  if (ABSL_PREDICT_FALSE(mode.empty())) mode = "w";
  SetTextImpl(text, mode);
}

#ifdef _WIN32

int GetTextAsFlags(absl::string_view mode) {
  for (size_t i = 1; i < mode.size(); ++i) {
    if (mode[i] == 'b') return _O_BINARY;
    if (mode[i] == 't') return _O_TEXT;
    if (mode[i] == ',') break;
  }
  return 0;
}

#endif  // _WIN32

}  // namespace riegeli::file_internal


================================================
FILE: riegeli/bytes/file_mode_string.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_FILE_MODE_STRING_H_
#define RIEGELI_BYTES_FILE_MODE_STRING_H_

#include <string>

#include "absl/base/optimization.h"
#include "absl/strings/string_view.h"

namespace riegeli::file_internal {

// If `false`, the file will be created if it does not exist, or it will be
// truncated to empty if it exists. This implies `SetRead(false)` and
// `set_append(false)` unless overwritten later.
//
// If `true`, the file must already exist, and its contents will not be
// truncated. Writing will start from the beginning, with random access
// supported. This implies `SetRead(true)`.
void SetExisting(bool existing, std::string& mode);
bool GetExisting(absl::string_view mode);

// If `false`, the file will be open for writing, except that `SetRead(false)`
// has no effect after `SetExisting(true)`.
//
// If `true`, the file will be open for writing and reading (using
// `ReadMode()`).
void SetRead(bool read, std::string& mode);
bool GetRead(absl::string_view mode);

// If `false`, the file will be truncated to empty if it exists.
//
// If `true`, the file will not be truncated if it exists, and writing will
// always happen at its end.
//
// Calling `SetAppend()` with any argument after `SetExisting(true)` undoes the
// effect if the file does not exist: it will be created.
void SetAppend(bool append, std::string& mode);
bool GetAppend(absl::string_view mode);

// If `false`, the file will be created if it does not exist, or it will be
// opened if it exists (truncated to empty by default, or left unchanged if
// `SetExisting(true)` or `SetAppend(true)` was used).
//
// If `true`, the file will be created if it does not exist, or opening will
// fail if it exists.
void SetExclusive(bool exclusive, std::string& mode);
bool GetExclusive(absl::string_view mode);

// If `false`, `execve()` (`CreateProcess()` on Windows) will close the file.
//
// If `true`, the file will remain open across `execve()` (`CreateProcess()` on
// Windows).
void SetInheritableReading(bool inheritable, std::string& mode);
void SetInheritableWriting(bool inheritable, std::string& mode);
bool GetInheritable(absl::string_view mode);

// If `false`, data will be read/written directly from/to the file. This is
// called the binary mode.
//
// If `true`, text mode translation will be applied on Windows: for reading
// CR-LF character pairs are translated to LF, and a ^Z character is interpreted
// as end of file; for writing LF characters are translated to CR-LF.
void SetTextReading(bool text, std::string& mode);
void SetTextWriting(bool text, std::string& mode);

#ifdef _WIN32

// Translates the text mode marker from 'b' / 't' / nothing to `_O_BINARY` /
// `_O_TEXT` / 0.
int GetTextAsFlags(absl::string_view mode);

#endif  // _WIN32

// Implementation details follow.

inline bool GetExisting(absl::string_view mode) {
  return ABSL_PREDICT_TRUE(!mode.empty()) && mode[0] == 'r';
}

inline void SetAppend(bool append, std::string& mode) {
  if (ABSL_PREDICT_FALSE(mode.empty())) mode = "w";
  mode[0] = append ? 'a' : 'w';
}

inline bool GetAppend(absl::string_view mode) {
  return ABSL_PREDICT_TRUE(!mode.empty()) && mode[0] == 'a';
}

}  // namespace riegeli::file_internal

#endif  // RIEGELI_BYTES_FILE_MODE_STRING_H_


================================================
FILE: riegeli/bytes/iostream_internal.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_IOSTREAM_INTERNAL_H_
#define RIEGELI_BYTES_IOSTREAM_INTERNAL_H_

#include <istream>
#include <type_traits>
#include <utility>

namespace riegeli::iostream_internal {

// There is no `std::istream::close()` nor `std::ostream::close()`, but some
// subclasses have `close()`, e.g. `std::ifstream`, `std::ofstream`,
// `std::fstream`. It is important to call `close()` before their destructor
// to detect errors.
//
// `iostream_internal::Close(stream)` calls `stream->close()` if that is
// defined, otherwise does nothing.

template <typename T, typename Enable = void>
struct HasClose : std::false_type {};

template <typename T>
struct HasClose<T, std::void_t<decltype(std::declval<T>().close())>>
    : std::true_type {};

template <typename Stream>
inline void Close(Stream& stream) {
  if constexpr (HasClose<Stream>::value) {
    stream.close();
  }
}

template <typename T>
inline std::istream* DetectIStream(T* stream) {
  if constexpr (std::is_base_of_v<std::istream, T>) {
    return stream;
  } else {
    return nullptr;
  }
}

}  // namespace riegeli::iostream_internal

#endif  // RIEGELI_BYTES_IOSTREAM_INTERNAL_H_


================================================
FILE: riegeli/bytes/istream_reader.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/istream_reader.h"

#include <stddef.h>

#include <cerrno>
#include <ios>
#include <istream>
#include <limits>
#include <optional>
#include <string>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/global.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_reader.h"

namespace riegeli {

void IStreamReaderBase::Initialize(std::istream* src,
                                   std::optional<Position> assumed_pos) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of IStreamReader: null stream pointer";
  RIEGELI_ASSERT(!supports_random_access_)
      << "Failed precondition of IStreamReaderBase::Initialize(): "
         "supports_random_access_ not reset";
  RIEGELI_ASSERT_OK(random_access_status_)
      << "Failed precondition of IStreamReaderBase::Initialize(): "
         "random_access_status_ not reset";
  if (ABSL_PREDICT_FALSE(src->fail())) {
    // Either constructing the stream failed or the stream was already in a
    // failed state. In any case `IStreamReaderBase` should fail.
    FailOperation("istream::istream()");
    return;
  }
  // A sticky `std::ios_base::eofbit` breaks future operations like
  // `std::istream::peek()` and `std::istream::tellg()`.
  src->clear(src->rdstate() & ~std::ios_base::eofbit);
  if (assumed_pos != std::nullopt) {
    if (ABSL_PREDICT_FALSE(
            *assumed_pos >
            Position{std::numeric_limits<std::streamoff>::max()})) {
      FailOverflow();
      return;
    }
    set_limit_pos(*assumed_pos);
    // `supports_random_access_` is left as `false`.
    random_access_status_ = Global([] {
      return absl::UnimplementedError(
          "IStreamReaderBase::Options::assumed_pos() excludes random access");
    });
  } else {
    errno = 0;
    const std::streamoff stream_pos = src->tellg();
    if (stream_pos < 0) {
      // Random access is not supported. Assume 0 as the initial position.
      // `supports_random_access_` is left as `false`.
      random_access_status_ = FailedOperationStatus("istream::tellg()");
      return;
    }
    set_limit_pos(IntCast<Position>(stream_pos));

    // Check the size, and whether random access is supported.
    src->seekg(0, std::ios_base::end);
    if (src->fail()) {
      // Random access is not supported. `supports_random_access_` is left as
      // `false`.
      random_access_status_ = FailedOperationStatus("istream::seekg()");
      src->clear(src->rdstate() & ~std::ios_base::failbit);
      return;
    }
    std::streamoff stream_size = src->tellg();
    if (ABSL_PREDICT_FALSE(stream_size < 0)) {
      FailOperation("istream::tellg()");
      return;
    }
    if (limit_pos() != IntCast<Position>(stream_size)) {
      src->seekg(IntCast<std::streamoff>(limit_pos()), std::ios_base::beg);
      if (ABSL_PREDICT_FALSE(src->fail())) {
        FailOperation("istream::seekg()");
        return;
      }
    }
#ifndef _WIN32
    if (stream_size == 0 && limit_pos() == 0) {
      // Some "/proc" and "/sys" files claim to have zero size but have
      // non-empty contents when read.
      if (BufferedReader::PullSlow(1, 0)) {
        if (growing_source_) {
          // Check the size again. Maybe the stream has grown.
          src->seekg(0, std::ios_base::end);
          if (ABSL_PREDICT_FALSE(src->fail())) {
            FailOperation("istream::seekg()");
            return;
          }
          stream_size = src->tellg();
          if (ABSL_PREDICT_FALSE(stream_size < 0)) {
            FailOperation("istream::tellg()");
            return;
          }
          if (limit_pos() != IntCast<Position>(stream_size)) {
            src->seekg(IntCast<std::streamoff>(limit_pos()),
                       std::ios_base::beg);
            if (ABSL_PREDICT_FALSE(src->fail())) {
              FailOperation("istream::seekg()");
              return;
            }
          }
          if (stream_size > 0) goto regular;
        }
        // This is one of "/proc" or "/sys" files which claim to have zero size
        // but have non-empty contents when read. Random access is not
        // supported. `supports_random_access_` is left as `false`.
        random_access_status_ = Global([] {
          return absl::UnimplementedError(
              "Random access is not supported because "
              "the file claims zero size but has non-empty contents when read");
        });
        return;
      }
      if (ABSL_PREDICT_FALSE(!ok())) return;
      // This is a regular empty stream.
    }
  regular:
#endif
    // Random access is supported.
    supports_random_access_ = true;
    if (!growing_source_) set_exact_size(IntCast<Position>(stream_size));
  }
  BeginRun();
}

void IStreamReaderBase::Done() {
  BufferedReader::Done();
  random_access_status_ = absl::OkStatus();
}

inline absl::Status IStreamReaderBase::FailedOperationStatus(
    absl::string_view operation) {
  // There is no way to get details why a stream operation failed without
  // letting the stream throw exceptions. Hopefully low level failures have set
  // `errno` as a side effect.
  //
  // This requires resetting `errno` to 0 before the stream operation because
  // the operation may fail without setting `errno`.
  const int error_number = errno;
  const std::string message = absl::StrCat(operation, " failed");
  return error_number == 0 ? absl::UnknownError(message)
                           : absl::ErrnoToStatus(error_number, message);
}

bool IStreamReaderBase::FailOperation(absl::string_view operation) {
  return Fail(FailedOperationStatus(operation));
}

bool IStreamReaderBase::ReadInternal(size_t min_length, size_t max_length,
                                     char* dest) {
  RIEGELI_ASSERT_GT(min_length, 0u)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "nothing to read";
  RIEGELI_ASSERT_GE(max_length, min_length)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "max_length < min_length";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedReader::ReadInternal()";
  std::istream& src = *SrcStream();
  errno = 0;
  for (;;) {
    if (ABSL_PREDICT_FALSE(
            limit_pos() >=
            Position{std::numeric_limits<std::streamoff>::max()})) {
      return FailOverflow();
    }
    std::streamsize length_to_read = IntCast<std::streamsize>(UnsignedMin(
        min_length,
        Position{std::numeric_limits<std::streamoff>::max()} - limit_pos()));
    const std::streamsize max_length_to_read =
        IntCast<std::streamsize>(UnsignedMin(
            max_length, Position{std::numeric_limits<std::streamoff>::max()} -
                            limit_pos()));
    std::streamsize length_read;
    if (length_to_read < max_length_to_read) {
      // Use `std::istream::readsome()` to read as much data as is available,
      // up to `max_length_to_read`.
      //
      // `std::istream::peek()` asks to read some characters into the buffer,
      // otherwise `std::istream::readsome()` may return 0.
      if (ABSL_PREDICT_FALSE(src.peek() == std::char_traits<char>::eof())) {
        if (ABSL_PREDICT_FALSE(src.fail())) {
          return FailOperation("istream::peek()");
        }
        // A sticky `std::ios_base::eofbit` breaks future operations like
        // `std::istream::peek()` and `std::istream::tellg()`.
        src.clear(src.rdstate() & ~std::ios_base::eofbit);
        if (!growing_source_) set_exact_size(limit_pos());
        return false;
      }
      length_read = src.readsome(dest, max_length_to_read);
      RIEGELI_ASSERT_GE(length_read, 0) << "negative istream::readsome()";
      RIEGELI_ASSERT_LE(length_read, max_length_to_read)
          << "istream::readsome() read more than requested";
      if (ABSL_PREDICT_TRUE(length_read > 0)) goto fragment_read;
      // `std::istream::peek()` returned non-`eof()` but
      // `std::istream::readsome()` returned 0. This might happen if
      // `src.rdbuf()->sgetc()` does not use the get area but leaves the next
      // character buffered elsewhere, e.g. for `std::cin` synchronized to
      // stdio. Fall back to `std::istream::read()`.
    }
    // Use `std::istream::read()` to read a fixed length of `length_to_read`.
    src.read(dest, length_to_read);
    length_read = src.gcount();
    RIEGELI_ASSERT_GE(length_read, 0) << "negative istream::gcount()";
    RIEGELI_ASSERT_LE(length_read, length_to_read)
        << "istream::read() read more than requested";
  fragment_read:
    move_limit_pos(IntCast<size_t>(length_read));
    if (ABSL_PREDICT_FALSE(src.fail())) {
      if (ABSL_PREDICT_FALSE(src.bad())) {
        FailOperation("istream::read()");
      } else {
        // End of stream is not a failure.
        //
        // A sticky `std::ios_base::eofbit` breaks future operations like
        // `std::istream::peek()` and `std::istream::tellg()`.
        src.clear(src.rdstate() &
                  ~(std::ios_base::eofbit | std::ios_base::failbit));
        if (!growing_source_) set_exact_size(limit_pos());
      }
      return IntCast<size_t>(length_read) >= min_length;
    }
    if (IntCast<size_t>(length_read) >= min_length) return true;
    dest += length_read;
    min_length -= IntCast<size_t>(length_read);
    max_length -= IntCast<size_t>(length_read);
  }
}

bool IStreamReaderBase::SeekBehindBuffer(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!IStreamReaderBase::SupportsRandomAccess())) {
    if (ABSL_PREDICT_FALSE(new_pos < start_pos())) {
      if (ok()) Fail(random_access_status_);
      return false;
    }
    return BufferedReader::SeekBehindBuffer(new_pos);
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  std::istream& src = *SrcStream();
  errno = 0;
  if (new_pos > limit_pos()) {
    // Seeking forwards.
    if (exact_size() != std::nullopt) {
      if (ABSL_PREDICT_FALSE(new_pos > *exact_size())) {
        // Stream ends.
        src.seekg(IntCast<std::streamoff>(*exact_size()), std::ios_base::beg);
        if (ABSL_PREDICT_FALSE(src.fail())) {
          return FailOperation("istream::seekg()");
        }
        set_limit_pos(*exact_size());
        return false;
      }
    } else {
      src.seekg(0, std::ios_base::end);
      if (ABSL_PREDICT_FALSE(src.fail())) {
        return FailOperation("istream::seekg()");
      }
      const std::streamoff stream_size = src.tellg();
      if (ABSL_PREDICT_FALSE(stream_size < 0)) {
        return FailOperation("istream::tellg()");
      }
      if (!growing_source_) set_exact_size(IntCast<Position>(stream_size));
      if (ABSL_PREDICT_FALSE(new_pos > IntCast<Position>(stream_size))) {
        // Stream ends.
        set_limit_pos(IntCast<Position>(stream_size));
        return false;
      }
    }
  }
  src.seekg(IntCast<std::streamoff>(new_pos), std::ios_base::beg);
  if (ABSL_PREDICT_FALSE(src.fail())) return FailOperation("istream::seekg()");
  set_limit_pos(new_pos);
  return true;
}

std::optional<Position> IStreamReaderBase::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  if (exact_size() != std::nullopt) return *exact_size();
  if (ABSL_PREDICT_FALSE(!IStreamReaderBase::SupportsRandomAccess())) {
    Fail(random_access_status_);
    return std::nullopt;
  }
  std::istream& src = *SrcStream();
  errno = 0;
  src.seekg(0, std::ios_base::end);
  if (ABSL_PREDICT_FALSE(src.fail())) {
    FailOperation("istream::seekg()");
    return std::nullopt;
  }
  const std::streamoff stream_size = src.tellg();
  if (ABSL_PREDICT_FALSE(stream_size < 0)) {
    FailOperation("istream::tellg()");
    return std::nullopt;
  }
  src.seekg(IntCast<std::streamoff>(limit_pos()), std::ios_base::beg);
  if (ABSL_PREDICT_FALSE(src.fail())) {
    FailOperation("istream::seekg()");
    return std::nullopt;
  }
  if (!growing_source_) set_exact_size(IntCast<Position>(stream_size));
  return IntCast<Position>(stream_size);
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/istream_reader.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_ISTREAM_READER_H_
#define RIEGELI_BYTES_ISTREAM_READER_H_

#include <stddef.h>

#include <cerrno>
#include <istream>
#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_reader.h"
#include "riegeli/bytes/iostream_internal.h"

namespace riegeli {

// Template parameter independent part of `IStreamReader`.
class IStreamReaderBase : public BufferedReader {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // If `std::nullopt`, the current position reported by `pos()` corresponds
    // to the current stream position if possible, otherwise 0 is assumed as the
    // initial position. Random access is supported if the stream supports
    // random access.
    //
    // If not `std::nullopt`, this position is assumed initially, to be reported
    // by `pos()`. It does not need to correspond to the current stream
    // position. Random access is not supported.
    //
    // Warning: On Windows this must not be `std::nullopt` if the stream is a
    // `std::ifstream` or `std::fstream` opened in text mode.
    //
    // Default: `std::nullopt`.
    Options& set_assumed_pos(std::optional<Position> assumed_pos) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      assumed_pos_ = assumed_pos;
      return *this;
    }
    Options&& set_assumed_pos(std::optional<Position> assumed_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_assumed_pos(assumed_pos));
    }
    std::optional<Position> assumed_pos() const { return assumed_pos_; }

    // If `true`, supports reading up to the end of the stream, then retrying
    // when the stream has grown. This disables caching the stream size.
    //
    // Default: `false`.
    Options& set_growing_source(bool growing_source) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      growing_source_ = growing_source;
      return *this;
    }
    Options&& set_growing_source(bool growing_source) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_growing_source(growing_source));
    }
    bool growing_source() const { return growing_source_; }

   private:
    std::optional<Position> assumed_pos_;
    bool growing_source_ = false;
  };

  // Returns the stream being read from. Unchanged by `Close()`.
  virtual std::istream* SrcStream() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool ToleratesReadingAhead() override {
    return BufferedReader::ToleratesReadingAhead() ||
           IStreamReaderBase::SupportsRandomAccess();
  }
  bool SupportsRandomAccess() override { return supports_random_access_; }

 protected:
  explicit IStreamReaderBase(Closed) noexcept : BufferedReader(kClosed) {}

  explicit IStreamReaderBase(BufferOptions buffer_options, bool growing_source);

  IStreamReaderBase(IStreamReaderBase&& that) noexcept;
  IStreamReaderBase& operator=(IStreamReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options, bool growing_source);
  void Initialize(std::istream* src, std::optional<Position> assumed_pos);
  ABSL_ATTRIBUTE_COLD bool FailOperation(absl::string_view operation);

  void Done() override;
  bool ReadInternal(size_t min_length, size_t max_length, char* dest) override;
  bool SeekBehindBuffer(Position new_pos) override;
  std::optional<Position> SizeImpl() override;

 private:
  absl::Status FailedOperationStatus(absl::string_view operation);

  bool growing_source_ = false;
  bool supports_random_access_ = false;
  absl::Status random_access_status_;

  // Invariant: `limit_pos() <= std::numeric_limits<std::streamoff>::max()`
};

// A `Reader` which reads from a `std::istream`.
//
// `IStreamReader` supports random access if
// `Options::assumed_pos() == std::nullopt` and the stream supports random
// access (this is checked by calling `std::istream::tellg()` and
// `std::istream::seekg()` to the end and back).
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the stream being read from. `Src` must support
// `Dependency<std::istream*, Src>`, e.g. `std::istream*` (not owned, default),
// `std::ifstream` (owned), `std::unique_ptr<std::istream>` (owned),
// `Any<std::istream*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// Warning: if random access is not supported and the stream is not owned,
// it will have an unpredictable amount of extra data consumed because of
// buffering.
//
// Until the `IStreamReader` is closed or no longer used, the stream must not be
// closed nor have its position changed.
template <typename Src = std::istream*>
class IStreamReader : public IStreamReaderBase {
 public:
  // Creates a closed `IStreamReader`.
  explicit IStreamReader(Closed) noexcept : IStreamReaderBase(kClosed) {}

  // Will read from the stream provided by `src`.
  explicit IStreamReader(Initializer<Src> src, Options options = Options());

  IStreamReader(IStreamReader&& that) = default;
  IStreamReader& operator=(IStreamReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `IStreamReader`. This
  // avoids constructing a temporary `IStreamReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());

  // Returns the object providing and possibly owning the stream being read
  // from. Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  std::istream* SrcStream() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;
  bool SyncImpl(SyncType sync_type) override;

  // The object providing and possibly owning the stream being read from.
  Dependency<std::istream*, Src> src_;
};

explicit IStreamReader(Closed) -> IStreamReader<DeleteCtad<Closed>>;
template <typename Src>
explicit IStreamReader(Src&& src, IStreamReaderBase::Options options =
                                      IStreamReaderBase::Options())
    -> IStreamReader<TargetT<Src>>;

// Implementation details follow.

inline IStreamReaderBase::IStreamReaderBase(BufferOptions buffer_options,
                                            bool growing_source)
    : BufferedReader(buffer_options), growing_source_(growing_source) {
  // Clear `errno` so that `Initialize()` can attribute failures to opening the
  // stream.
  errno = 0;
}

inline IStreamReaderBase::IStreamReaderBase(IStreamReaderBase&& that) noexcept
    : BufferedReader(static_cast<BufferedReader&&>(that)),
      growing_source_(that.growing_source_),
      supports_random_access_(
          std::exchange(that.supports_random_access_, false)),
      random_access_status_(std::move(that.random_access_status_)) {}

inline IStreamReaderBase& IStreamReaderBase::operator=(
    IStreamReaderBase&& that) noexcept {
  BufferedReader::operator=(static_cast<BufferedReader&&>(that));
  growing_source_ = that.growing_source_;
  supports_random_access_ = std::exchange(that.supports_random_access_, false);
  random_access_status_ = std::move(that.random_access_status_);
  return *this;
}

inline void IStreamReaderBase::Reset(Closed) {
  BufferedReader::Reset(kClosed);
  growing_source_ = false;
  supports_random_access_ = false;
  random_access_status_ = absl::OkStatus();
}

inline void IStreamReaderBase::Reset(BufferOptions buffer_options,
                                     bool growing_source) {
  BufferedReader::Reset(buffer_options);
  growing_source_ = growing_source;
  supports_random_access_ = false;
  random_access_status_ = absl::OkStatus();
  // Clear `errno` so that `Initialize()` can attribute failures to opening the
  // stream.
  errno = 0;
}

template <typename Src>
inline IStreamReader<Src>::IStreamReader(Initializer<Src> src, Options options)
    : IStreamReaderBase(options.buffer_options(), options.growing_source()),
      src_(std::move(src)) {
  Initialize(src_.get(), options.assumed_pos());
}

template <typename Src>
inline void IStreamReader<Src>::Reset(Closed) {
  IStreamReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void IStreamReader<Src>::Reset(Initializer<Src> src, Options options) {
  IStreamReaderBase::Reset(options.buffer_options(), options.growing_source());
  src_.Reset(std::move(src));
  Initialize(src_.get(), options.assumed_pos());
}

template <typename Src>
void IStreamReader<Src>::Done() {
  IStreamReaderBase::Done();
  if (src_.IsOwning()) {
    errno = 0;
    iostream_internal::Close(*src_);
    if (ABSL_PREDICT_FALSE(src_->fail()) && ABSL_PREDICT_TRUE(ok())) {
      FailOperation("istream::close()");
    }
  }
}

template <typename Src>
bool IStreamReader<Src>::SyncImpl(SyncType sync_type) {
  if (ABSL_PREDICT_FALSE(!IStreamReaderBase::SyncImpl(sync_type))) return false;
  if ((sync_type != SyncType::kFromObject || src_.IsOwning()) &&
      IStreamReaderBase::SupportsRandomAccess()) {
    if (ABSL_PREDICT_FALSE(src_->sync() != 0)) {
      return FailOperation("istream::sync()");
    }
  }
  return true;
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_ISTREAM_READER_H_


================================================
FILE: riegeli/bytes/joining_reader.cc
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/joining_reader.h"

#include <stddef.h>

#include <limits>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/pullable_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void JoiningReaderBase::Done() {
  PullableReader::Done();
  if (ABSL_PREDICT_TRUE(ok())) {
    Reader* shard = ShardReader();
    if (shard_is_open(shard)) CloseShardInternal();
  }
}

bool JoiningReaderBase::CloseShardImpl() {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of JoiningReaderBase::CloseShardImpl()";
  RIEGELI_ASSERT(shard_is_open())
      << "Failed precondition of JoiningReaderBase::CloseShardImpl(): "
         "shard already closed";
  Reader* shard = ShardReader();
  if (ABSL_PREDICT_FALSE(!shard->Close())) {
    return FailWithoutAnnotation(AnnotateOverShard(shard->status()));
  }
  return true;
}

inline bool JoiningReaderBase::OpenShardInternal() {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of JoiningReaderBase::OpenShardInternal()";
  RIEGELI_ASSERT(!shard_is_open())
      << "Failed precondition of JoiningReaderBase::OpenShardInternal(): "
         "shard already opened";
  if (ABSL_PREDICT_FALSE(!OpenShardImpl())) return false;
  RIEGELI_ASSERT_OK(*this)
      << "Failed postcondition of JoiningReaderBase::OpenShardImpl()";
  RIEGELI_ASSERT(shard_is_open())
      << "Failed postcondition of JoiningReaderBase::OpenShardImpl(): "
         "shard not opened";
  if (read_all_hint_) {
    Reader* shard = ShardReader();
    shard->SetReadAllHint(true);
  }
  return true;
}

inline bool JoiningReaderBase::CloseShardInternal() {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of JoiningReaderBase::CloseShardInternal()";
  RIEGELI_ASSERT(shard_is_open())
      << "Failed precondition of JoiningReaderBase::CloseShardInternal(): "
         "shard already closed";
  const bool close_shard_ok = CloseShardImpl();
  RIEGELI_ASSERT(!shard_is_open())
      << "Failed postcondition of JoiningReaderBase::CloseShardImpl(): "
         "shard not closed";
  if (ABSL_PREDICT_FALSE(!close_shard_ok)) {
    RIEGELI_ASSERT(!ok())
        << "Failed postcondition of JoiningReaderBase::CloseShardImpl(): "
           "false returned but JoiningReaderBase OK";
    return false;
  }
  RIEGELI_ASSERT_OK(*this)
      << "Failed postcondition of JoiningReaderBase::CloseShardImpl()";
  return true;
}

bool JoiningReaderBase::OpenShard() {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of JoiningReaderBase::OpenShard()";
  RIEGELI_ASSERT(!shard_is_open())
      << "Failed precondition of JoiningReaderBase::OpenShard(): "
         "shard already opened";
  if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
  Reader* shard = ShardReader();
  MakeBuffer(*shard);
  return true;
}

bool JoiningReaderBase::CloseShard() {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of JoiningReaderBase::CloseShard()";
  RIEGELI_ASSERT(shard_is_open())
      << "Failed precondition of JoiningReaderBase::CloseShard(): "
         "shard already closed";
  Reader* shard = ShardReader();
  SyncBuffer(*shard);
  return CloseShardInternal();
}

absl::Status JoiningReaderBase::AnnotateStatusImpl(absl::Status status) {
  Reader* shard = ShardReader();
  if (shard_is_open(shard)) status = shard->AnnotateStatus(std::move(status));
  // The status might have been annotated by `*shard` with the position within
  // the shard. Clarify that the current position is the position across shards
  // instead of delegating to `PullableReader::AnnotateStatusImpl()`.
  return AnnotateOverShard(std::move(status));
}

absl::Status JoiningReaderBase::AnnotateOverShard(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("across shards at byte ", pos()));
  }
  return status;
}

void JoiningReaderBase::SetReadAllHintImpl(bool read_all_hint) {
  if (ABSL_PREDICT_FALSE(!ok())) return;
  read_all_hint_ = read_all_hint;
  Reader* shard = ShardReader();
  if (!shard_is_open(shard)) return;
  BehindScratch behind_scratch(this);
  SyncBuffer(*shard);
  shard->SetReadAllHint(read_all_hint_);
  MakeBuffer(*shard);
}

bool JoiningReaderBase::PullBehindScratch(size_t recommended_length) {
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of PullableReader::PullBehindScratch(): "
         "enough data available, use Pull() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::PullBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader* shard = ShardReader();
  if (shard_is_open(shard)) {
    SyncBuffer(*shard);
  } else {
    if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
    shard = ShardReader();
  }
  while (ABSL_PREDICT_FALSE(!shard->Pull(1, recommended_length))) {
    if (ABSL_PREDICT_FALSE(!shard->ok())) {
      return FailWithoutAnnotation(AnnotateOverShard(shard->status()));
    }
    if (ABSL_PREDICT_FALSE(!CloseShardInternal())) return false;
    if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
    shard = ShardReader();
  }
  if (ABSL_PREDICT_FALSE(limit_pos() == std::numeric_limits<Position>::max())) {
    return FailOverflow();
  }
  MakeBuffer(*shard);
  return true;
}

bool JoiningReaderBase::ReadBehindScratch(size_t length, char* dest) {
  RIEGELI_ASSERT_LT(available(), length)
      << "Failed precondition of PullableReader::ReadBehindScratch(char*): "
         "enough data available, use Read(char*) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::ReadBehindScratch(char*): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader* shard = ShardReader();
  if (shard_is_open(shard)) {
    SyncBuffer(*shard);
  } else {
    if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
    shard = ShardReader();
  }
  for (;;) {
    const size_t length_to_read =
        UnsignedMin(length, std::numeric_limits<Position>::max() - limit_pos());
    size_t length_read;
    const bool read_ok = shard->Read(length_to_read, dest, &length_read);
    move_limit_pos(length_read);
    if (ABSL_PREDICT_TRUE(read_ok)) {
      if (ABSL_PREDICT_FALSE(length_to_read < length)) return FailOverflow();
      break;
    }
    if (ABSL_PREDICT_FALSE(!shard->ok())) {
      return FailWithoutAnnotation(AnnotateOverShard(shard->status()));
    }
    dest += length_read;
    length -= length_read;
    if (ABSL_PREDICT_FALSE(!CloseShardInternal())) return false;
    if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
    shard = ShardReader();
  }
  MakeBuffer(*shard);
  return true;
}

bool JoiningReaderBase::ReadBehindScratch(size_t length, Chain& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of PullableReader::ReadBehindScratch(Chain&): "
         "enough data available, use Read(Chain&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of PullableReader::ReadBehindScratch(Chain&): "
         "Chain size overflow";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::ReadBehindScratch(Chain&): "
         "scratch used";
  return ReadInternal(length, dest);
}

bool JoiningReaderBase::ReadBehindScratch(size_t length, absl::Cord& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of PullableReader::ReadBehindScratch(Cord&): "
         "enough data available, use Read(Cord&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of PullableReader::ReadBehindScratch(Cord&): "
         "Cord size overflow";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::ReadBehindScratch(Cord&): "
         "scratch used";
  return ReadInternal(length, dest);
}

template <typename Dest>
inline bool JoiningReaderBase::ReadInternal(size_t length, Dest& dest) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader* shard = ShardReader();
  if (shard_is_open(shard)) {
    SyncBuffer(*shard);
  } else {
    if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
    shard = ShardReader();
  }
  for (;;) {
    const size_t length_to_read =
        UnsignedMin(length, std::numeric_limits<Position>::max() - limit_pos());
    size_t length_read;
    const bool read_ok =
        shard->ReadAndAppend(length_to_read, dest, &length_read);
    move_limit_pos(length_read);
    if (ABSL_PREDICT_TRUE(read_ok)) {
      if (ABSL_PREDICT_FALSE(length_to_read < length)) return FailOverflow();
      break;
    }
    if (ABSL_PREDICT_FALSE(!shard->ok())) {
      return FailWithoutAnnotation(AnnotateOverShard(shard->status()));
    }
    length -= length_read;
    if (ABSL_PREDICT_FALSE(!CloseShardInternal())) return false;
    if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
    shard = ShardReader();
  }
  MakeBuffer(*shard);
  return true;
}

bool JoiningReaderBase::CopyBehindScratch(Position length, Writer& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of PullableReader::CopyBehindScratch(Writer&): "
         "enough data available, use Copy(Writer&) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::CopyBehindScratch(Writer&): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader* shard = ShardReader();
  if (shard_is_open(shard)) {
    SyncBuffer(*shard);
  } else {
    if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
    shard = ShardReader();
  }
  for (;;) {
    const size_t length_to_read =
        UnsignedMin(length, std::numeric_limits<Position>::max() - limit_pos());
    Position length_read;
    const bool copy_ok = shard->Copy(length_to_read, dest, &length_read);
    move_limit_pos(length_read);
    if (ABSL_PREDICT_TRUE(copy_ok)) {
      if (ABSL_PREDICT_FALSE(length_to_read < length)) return FailOverflow();
      break;
    }
    if (ABSL_PREDICT_FALSE(!shard->ok())) {
      return FailWithoutAnnotation(AnnotateOverShard(shard->status()));
    }
    if (ABSL_PREDICT_FALSE(!dest.ok())) {
      MakeBuffer(*shard);
      return false;
    }
    length -= length_read;
    if (ABSL_PREDICT_FALSE(!CloseShardInternal())) return false;
    if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
    shard = ShardReader();
  }
  MakeBuffer(*shard);
  return true;
}

bool JoiningReaderBase::ReadSomeBehindScratch(size_t max_length, char* dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of PullableReader::ReadSomeBehindScratch(char*): "
         "nothing to read, use ReadSome(char*) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of PullableReader::ReadSomeBehindScratch(char*): "
         "some data available, use ReadSome(char*) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::ReadSomeBehindScratch(char*): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader* shard = ShardReader();
  if (shard_is_open(shard)) {
    SyncBuffer(*shard);
  } else {
    if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
    shard = ShardReader();
  }
  const Position remaining = std::numeric_limits<Position>::max() - limit_pos();
  if (ABSL_PREDICT_FALSE(remaining == 0)) return FailOverflow();
  max_length = UnsignedMin(max_length, remaining);
  for (;;) {
    size_t length_read;
    if (ABSL_PREDICT_TRUE(shard->ReadSome(max_length, dest, &length_read))) {
      move_limit_pos(length_read);
      break;
    }
    if (ABSL_PREDICT_FALSE(!shard->ok())) {
      return FailWithoutAnnotation(AnnotateOverShard(shard->status()));
    }
    if (ABSL_PREDICT_FALSE(!CloseShardInternal())) return false;
    if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
    shard = ShardReader();
  }
  MakeBuffer(*shard);
  return true;
}

bool JoiningReaderBase::CopySomeBehindScratch(size_t max_length, Writer& dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of "
         "PullableReader::CopySomeBehindScratch(Writer&): "
         "nothing to read, use CopySome(Writer&) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of "
         "PullableReader::CopySomeBehindScratch(Writer&): "
         "some data available, use CopySome(Writer&) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PullableReader::CopySomeBehindScratch(Writer&): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader* shard = ShardReader();
  if (shard_is_open(shard)) {
    SyncBuffer(*shard);
  } else {
    if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
    shard = ShardReader();
  }
  const Position remaining = std::numeric_limits<Position>::max() - limit_pos();
  if (ABSL_PREDICT_FALSE(remaining == 0)) return FailOverflow();
  max_length = UnsignedMin(max_length, remaining);
  for (;;) {
    size_t length_read;
    const bool copy_ok = shard->CopySome(max_length, dest, &length_read);
    move_limit_pos(length_read);
    if (ABSL_PREDICT_TRUE(copy_ok)) break;
    if (ABSL_PREDICT_FALSE(!shard->ok())) {
      return FailWithoutAnnotation(AnnotateOverShard(shard->status()));
    }
    if (ABSL_PREDICT_FALSE(!dest.ok())) {
      MakeBuffer(*shard);
      return false;
    }
    if (ABSL_PREDICT_FALSE(!CloseShardInternal())) return false;
    if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
    shard = ShardReader();
  }
  MakeBuffer(*shard);
  return true;
}

void JoiningReaderBase::ReadHintBehindScratch(size_t min_length,
                                              size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of PullableReader::ReadHintBehindScratch(): "
         "enough data available, use ReadHint() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::ReadHintBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return;
  Reader* shard = ShardReader();
  if (shard_is_open(shard)) {
    SyncBuffer(*shard);
  } else {
    if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return;
    shard = ShardReader();
  }
  shard->ReadHint(min_length, recommended_length);
  MakeBuffer(*shard);
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/joining_reader.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_JOINING_READER_H_
#define RIEGELI_BYTES_JOINING_READER_H_

#include <stddef.h>

#include <limits>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/pullable_reader.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

class Writer;

// Template parameter independent part of `JoiningReader`.
class JoiningReaderBase : public PullableReader {
 public:
  bool ToleratesReadingAhead() override { return read_all_hint_; }

 protected:
  using PullableReader::PullableReader;

  JoiningReaderBase(JoiningReaderBase&& that) noexcept;
  JoiningReaderBase& operator=(JoiningReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset();

  void Done() override;
  void SetReadAllHintImpl(bool read_all_hint) override;

  // Returns the shard `Reader`.
  virtual Reader* ShardReader() ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;
  virtual const Reader* ShardReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Opens the next shard as `shard()` if it exists.
  //
  // Preconditions:
  //   `ok()`
  //   `!shard_is_open()`
  //
  // Return values:
  //  * `true`                 - success (`ok()`, `shard_is_open()`)
  //  * `false` (when `ok()`)  - there is no next shard
  //  * `false` (when `!ok()`) - failure
  //
  // `OpenShardImpl()` must be overridden but should not be called directly
  // because it does not synchronize buffer pointers of `*ShardReader()` with
  // `*this`. See `OpenShard()` for that.
  virtual bool OpenShardImpl() = 0;

  // Closes `shard()`.
  //
  // Preconditions:
  //   `ok()`
  //   `shard_is_open()`
  //
  // Return values:
  //  * `true`  - success (`ok()`, `!shard_is_open()`)
  //  * `false` - failure (`!ok()`, `!shard_is_open()`)
  //
  // The default implementation calls `ShardReader()->Close()` and propagates
  // failures from that.
  //
  // `CloseShardImpl()` can be overridden but should not be called directly
  // because it does not synchronize buffer pointers of `*this` with
  // `*ShardReader()`. See `CloseShard()` for that.
  virtual bool CloseShardImpl();

  // Calls `OpenShardImpl()` and synchronizes buffer pointers of
  // `*ShardReader()` with `*this`.
  //
  // Preconditions:
  //   `ok()`
  //   `!shard_is_open()`
  //
  // Return values:
  //  * `true`                 - success (`ok()`, `shard_is_open()`)
  //  * `false` (when `ok()`)  - there is no next shard
  //  * `false` (when `!ok()`) - failure
  bool OpenShard();

  // Synchronizes buffer pointers of `*this` with `*ShardReader()` and calls
  // `CloseShardImpl()`.
  //
  // Preconditions:
  //   `ok()`
  //   `shard_is_open()`
  //
  // Return values:
  //  * `true`  - success (`ok()`, `!shard_is_open()`)
  //  * `false` - failure (`!ok()`, `!shard_is_open()`)
  bool CloseShard();

  // Returns `true` if a shard is open.
  //
  // Same as `shard != nullptr && shard->is_open()`, with the default `shard` of
  // `ShardReader()`.
  bool shard_is_open() const;
  bool shard_is_open(const Reader* shard) const;

  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverShard(absl::Status status);

  // Sets cursor of `shard` to cursor of `*this`. Sets buffer pointers of
  // `*this` to `nullptr`.
  void SyncBuffer(Reader& shard);

  // Sets buffer pointers of `*this` to buffer pointers of `shard`. Fails
  // `*this` if `shard` failed.
  void MakeBuffer(Reader& shard);

  bool PullBehindScratch(size_t recommended_length) override;
  using PullableReader::ReadBehindScratch;
  bool ReadBehindScratch(size_t length, char* dest) override;
  bool ReadBehindScratch(size_t length, Chain& dest) override;
  bool ReadBehindScratch(size_t length, absl::Cord& dest) override;
  using PullableReader::CopyBehindScratch;
  bool CopyBehindScratch(Position length, Writer& dest) override;
  using PullableReader::ReadSomeBehindScratch;
  bool ReadSomeBehindScratch(size_t max_length, char* dest) override;
  using PullableReader::CopySomeBehindScratch;
  bool CopySomeBehindScratch(size_t max_length, Writer& dest) override;
  void ReadHintBehindScratch(size_t min_length,
                             size_t recommended_length) override;

 private:
  bool OpenShardInternal();
  bool CloseShardInternal();

  // This template is defined and used only in joining_reader.cc.
  template <typename Dest>
  bool ReadInternal(size_t length, Dest& dest);

  bool read_all_hint_ = false;

  // Invariants if `ok()` and scratch is not used:
  //   `start() == (shard_is_open() ? ShardReader()->cursor() : nullptr)`
  //   `limit() <= (shard_is_open() ? ShardReader()->limit() : nullptr)`
};

// Abstract class of a `Reader` which joins data from multiple shards.
//
// The `Shard` template parameter specifies the type of the object providing and
// possibly owning the shard `Reader`. `Shard` must support
// `Dependency<Reader*, Shard>`, e.g. `Reader*` (not owned),
// `std::unique_ptr<Reader>` (owned), `ChainReader<>` (owned),
// `Any<Reader*>` (maybe owned).
template <typename Shard>
class JoiningReader : public JoiningReaderBase {
 protected:
  using JoiningReaderBase::JoiningReaderBase;

  JoiningReader(JoiningReader&& that) = default;
  JoiningReader& operator=(JoiningReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `JoiningReader`. This
  // avoids constructing a temporary `JoiningReader` and moving from it.
  // Derived classes which override `Reset()` should include a call to
  // `JoiningReader::Reset()`.
  void Reset(Closed);
  void Reset();

  void Done() override;

  // Returns the object providing and possibly owning the shard `Reader`.
  Shard& shard() ABSL_ATTRIBUTE_LIFETIME_BOUND { return shard_.manager(); }
  const Shard& shard() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return shard_.manager();
  }
  Reader* ShardReader() ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return shard_.get();
  }
  const Reader* ShardReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return shard_.get();
  }

 private:
  class Mover;

  // The object providing and possibly owning the shard `Reader`.
  MovingDependency<Reader*, Shard, Mover> shard_;
};

// Implementation details follow.

inline JoiningReaderBase::JoiningReaderBase(JoiningReaderBase&& that) noexcept
    : PullableReader(static_cast<PullableReader&&>(that)),
      read_all_hint_(that.read_all_hint_) {}

inline JoiningReaderBase& JoiningReaderBase::operator=(
    JoiningReaderBase&& that) noexcept {
  PullableReader::operator=(static_cast<PullableReader&&>(that));
  read_all_hint_ = that.read_all_hint_;
  return *this;
}

inline void JoiningReaderBase::Reset(Closed) {
  PullableReader::Reset(kClosed);
  read_all_hint_ = false;
}

inline void JoiningReaderBase::Reset() {
  PullableReader::Reset();
  read_all_hint_ = false;
}

inline bool JoiningReaderBase::shard_is_open() const {
  return shard_is_open(ShardReader());
}

inline bool JoiningReaderBase::shard_is_open(const Reader* shard) const {
  return shard != nullptr && shard->is_open();
}

inline void JoiningReaderBase::SyncBuffer(Reader& shard) {
  RIEGELI_ASSERT(shard_is_open(&shard))
      << "Failed precondition of JoiningReaderBase::SyncBuffer(): "
         "shard is closed";
  shard.set_cursor(cursor());
  set_limit_pos(pos());
  set_buffer();
}

inline void JoiningReaderBase::MakeBuffer(Reader& shard) {
  RIEGELI_ASSERT(shard_is_open(&shard))
      << "Failed precondition of JoiningReaderBase::MakeBuffer(): "
         "shard is closed";
  set_buffer(shard.cursor(),
             UnsignedMin(shard.available(),
                         std::numeric_limits<Position>::max() - limit_pos()));
  move_limit_pos(available());
  if (ABSL_PREDICT_FALSE(!shard.ok())) {
    FailWithoutAnnotation(AnnotateOverShard(shard.status()));
  }
}

template <typename Shard>
class JoiningReader<Shard>::Mover {
 public:
  static auto member() { return &JoiningReader::shard_; }

  explicit Mover(JoiningReader& self, JoiningReader& that)
      : behind_scratch_(&self), uses_buffer_(self.start() != nullptr) {
    // Buffer pointers are already moved so `SyncBuffer()` is called on `self`.
    // `shard_` is not moved yet so `shard_` is taken from `that`.
    if (uses_buffer_) self.SyncBuffer(*that.shard_);
  }

  void Done(JoiningReader& self) {
    if (uses_buffer_) self.MakeBuffer(*self.shard_);
  }

 private:
  BehindScratch behind_scratch_;
  bool uses_buffer_;
};

template <typename Shard>
inline void JoiningReader<Shard>::Reset(Closed) {
  JoiningReaderBase::Reset(kClosed);
  shard_.Reset();
}

template <typename Shard>
inline void JoiningReader<Shard>::Reset() {
  JoiningReaderBase::Reset();
  shard_.Reset();
}

template <typename Shard>
void JoiningReader<Shard>::Done() {
  JoiningReaderBase::Done();
  shard_.Reset();
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_JOINING_READER_H_


================================================
FILE: riegeli/bytes/limiting_backward_writer.cc
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/limiting_backward_writer.h"

#include <stddef.h>

#include <limits>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"

namespace riegeli {

void LimitingBackwardWriterBase::Initialize(BackwardWriter* dest,
                                            const Options& options,
                                            bool is_owning) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of LimitingBackwardWriter: "
         "null BackwardWriter pointer";
  if (is_owning && exact()) {
    if (options.max_pos() != std::nullopt) {
      dest->SetWriteSizeHint(SaturatingSub(*options.max_pos(), dest->pos()));
    } else if (options.max_length() != std::nullopt) {
      dest->SetWriteSizeHint(*options.max_length());
    }
  }
  set_buffer(dest->limit(), dest->start_to_limit(), dest->start_to_cursor());
  set_start_pos(dest->start_pos());
  if (ABSL_PREDICT_FALSE(!dest->ok())) FailWithoutAnnotation(dest->status());
  if (options.max_pos() != std::nullopt) {
    set_max_pos(*options.max_pos());
  } else if (options.max_length() != std::nullopt) {
    set_max_length(*options.max_length());
  }
}

void LimitingBackwardWriterBase::set_max_pos(Position max_pos) {
  max_pos_ = max_pos;
  if (ABSL_PREDICT_FALSE(start_pos() > max_pos_)) {
    set_buffer(cursor());
    set_start_pos(max_pos_);
    FailLimitExceeded();
  }
}

void LimitingBackwardWriterBase::set_max_length(Position max_length) {
  if (ABSL_PREDICT_FALSE(max_length >
                         std::numeric_limits<Position>::max() - pos())) {
    if (exact_) FailLengthOverflow(max_length);
    max_pos_ = std::numeric_limits<Position>::max();
    return;
  }
  set_max_pos(pos() + max_length);
}

void LimitingBackwardWriterBase::Done() {
  BackwardWriter& dest = *DestWriter();
  if (ABSL_PREDICT_TRUE(ok())) SyncBuffer(dest);
  if (exact_ && ABSL_PREDICT_FALSE(pos() < max_pos_)) {
    // Do not call `Fail()` because `AnnotateStatusImpl()` synchronizes the
    // buffer again.
    FailWithoutAnnotation(dest.AnnotateStatus(absl::InvalidArgumentError(
        absl::StrCat("Not enough data: expected ", max_pos(), " or ",
                     max_length(), " more"))));
  }
  BackwardWriter::Done();
}

bool LimitingBackwardWriterBase::FailLimitExceeded() {
  BackwardWriter& dest = *DestWriter();
  return FailLimitExceeded(dest);
}

bool LimitingBackwardWriterBase::FailLimitExceeded(BackwardWriter& dest) {
  set_start_pos(max_pos_);
  set_buffer();
  // Do not call `Fail()` because `AnnotateStatusImpl()` synchronizes the buffer
  // again.
  return FailWithoutAnnotation(dest.AnnotateStatus(
      absl::ResourceExhaustedError("Position limit exceeded")));
}

inline void LimitingBackwardWriterBase::FailLengthOverflow(
    Position max_length) {
  Fail(absl::InvalidArgumentError(absl::StrCat(
      "Not enough data: expected ", max_length,
      " more, which overflows the BackwardWriter position range")));
}

absl::Status LimitingBackwardWriterBase::AnnotateStatusImpl(
    absl::Status status) {
  // Fully delegate annotations to `*DestWriter()`.
  if (is_open()) {
    BackwardWriter& dest = *DestWriter();
    const bool sync_buffer_ok = SyncBuffer(dest);
    status = dest.AnnotateStatus(std::move(status));
    if (ABSL_PREDICT_TRUE(sync_buffer_ok)) MakeBuffer(dest);
  }
  return status;
}

bool LimitingBackwardWriterBase::PushSlow(size_t min_length,
                                          size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of BackwardWriter::PushSlow(): "
         "enough space available, use Push() instead";
  RIEGELI_ASSERT_LE(start_pos(), max_pos_)
      << "Failed invariant of LimitingBackwardWriterBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  BackwardWriter& dest = *DestWriter();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(dest))) return false;
  const bool push_ok = dest.Push(min_length, recommended_length);
  MakeBuffer(dest);
  return push_ok;
}

bool LimitingBackwardWriterBase::WriteSlow(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(string_view): "
         "enough space available, use Write(string_view) instead";
  return WriteInternal(src, [](absl::string_view src, size_t length) {
    src.remove_prefix(length);
    return src;
  });
}

bool LimitingBackwardWriterBase::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  return WriteInternal(std::move(src), [](ExternalRef src, size_t length) {
    Chain result(std::move(src));
    result.RemovePrefix(length);
    return result;
  });
}

bool LimitingBackwardWriterBase::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  return WriteInternal(src, [](const Chain& src, size_t length) {
    Chain result = src;
    result.RemovePrefix(length);
    return result;
  });
}

bool LimitingBackwardWriterBase::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  return WriteInternal(std::move(src),
                       [](Chain&& src, size_t length) -> Chain&& {
                         src.RemovePrefix(length);
                         return std::move(src);
                       });
}

bool LimitingBackwardWriterBase::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  return WriteInternal(src, [](const absl::Cord& src, size_t length) {
    absl::Cord result = src;
    result.RemovePrefix(length);
    return result;
  });
}

bool LimitingBackwardWriterBase::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  return WriteInternal(std::move(src),
                       [](absl::Cord&& src, size_t length) -> absl::Cord&& {
                         src.RemovePrefix(length);
                         return std::move(src);
                       });
}

template <typename Src, typename RemovePrefix>
inline bool LimitingBackwardWriterBase::WriteInternal(
    Src&& src, RemovePrefix&& remove_prefix) {
  RIEGELI_ASSERT_LE(start_pos(), max_pos_)
      << "Failed invariant of LimitingBackwardWriterBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  BackwardWriter& dest = *DestWriter();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(dest))) return false;
  const Position max_length = max_pos_ - pos();
  if (ABSL_PREDICT_TRUE(src.size() <= max_pos_ - pos())) {
    const bool write_ok = dest.Write(std::forward<Src>(src));
    MakeBuffer(dest);
    return write_ok;
  }
  if (ABSL_PREDICT_FALSE(!dest.Write(std::forward<RemovePrefix>(remove_prefix)(
          std::forward<Src>(src), src.size() - max_length)))) {
    MakeBuffer(dest);
    return false;
  }
  return FailLimitExceeded(dest);
}

bool LimitingBackwardWriterBase::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  RIEGELI_ASSERT_LE(start_pos(), max_pos_)
      << "Failed invariant of LimitingBackwardWriterBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  BackwardWriter& dest = *DestWriter();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(dest))) return false;
  const Position max_length = max_pos_ - pos();
  if (ABSL_PREDICT_TRUE(src.size() <= max_length)) {
    const bool write_ok = dest.Write(src);
    MakeBuffer(dest);
    return write_ok;
  }
  if (ABSL_PREDICT_FALSE(!dest.Write(src.Extract(max_length)))) {
    MakeBuffer(dest);
    return false;
  }
  return FailLimitExceeded(dest);
}

bool LimitingBackwardWriterBase::SupportsTruncate() {
  BackwardWriter* const dest = DestWriter();
  return dest != nullptr && dest->SupportsTruncate();
}

bool LimitingBackwardWriterBase::TruncateImpl(Position new_size) {
  RIEGELI_ASSERT_LE(start_pos(), max_pos_)
      << "Failed invariant of LimitingBackwardWriterBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  BackwardWriter& dest = *DestWriter();
  if (ABSL_PREDICT_FALSE(pos() > max_pos_) && new_size <= max_pos_) {
    set_cursor(cursor() + IntCast<size_t>(pos() - max_pos_));
  }
  if (ABSL_PREDICT_FALSE(!SyncBuffer(dest))) return false;
  const bool truncate_ok = dest.Truncate(new_size);
  MakeBuffer(dest);
  return truncate_ok;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/limiting_backward_writer.h
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_LIMITING_BACKWARD_WRITER_H_
#define RIEGELI_BYTES_LIMITING_BACKWARD_WRITER_H_

#include <stddef.h>

#include <limits>
#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"

namespace riegeli {

// Template parameter independent part of `LimitingBackwardWriter`.
class LimitingBackwardWriterBase : public BackwardWriter {
 public:
  class Options {
   public:
    Options() noexcept {}

    // The limit expressed as an absolute position.
    //
    // `std::nullopt` means no limit, unless `max_length()` is set.
    //
    // Default: `std::nullopt`.
    Options& set_max_pos(std::optional<Position> max_pos) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      max_pos_ = max_pos;
      max_length_ = std::nullopt;
      return *this;
    }
    Options&& set_max_pos(std::optional<Position> max_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_max_pos(max_pos));
    }
    std::optional<Position> max_pos() const { return max_pos_; }

    // A shortcut for `set_max_pos(pos)` with `set_exact(true)`.
    Options& set_exact_pos(Position exact_pos) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return set_max_pos(exact_pos).set_exact(true);
    }
    Options&& set_exact_pos(Position exact_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_exact_pos(exact_pos));
    }

    // The limit expressed as a length relative to the current position.
    //
    // `std::nullopt` means no limit, unless `max_pos()` is set.
    //
    // Default: `std::nullopt`.
    Options& set_max_length(std::optional<Position> max_length) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      max_length_ = max_length;
      max_pos_ = std::nullopt;
      return *this;
    }
    Options&& set_max_length(std::optional<Position> max_length) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_max_length(max_length));
    }
    std::optional<Position> max_length() const { return max_length_; }

    // A shortcut for `set_max_length(length)` with `set_exact(true)`.
    Options& set_exact_length(Position exact_length) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return set_max_length(exact_length).set_exact(true);
    }
    Options&& set_exact_length(Position exact_length) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_exact_length(exact_length));
    }

    // If `false`, `LimitingBackwardWriter` will write data at most up to the
    // limit. Writing will fail if the limit is exceeded.
    //
    // If `true`, `LimitingBackwardWriter` will write data exactly up to the
    // limit. Writing will fail if the limit is exceeded, and `Close()` will
    // fail if the current position at that time is before the limit.
    //
    // Default: `false`.
    Options& set_exact(bool exact) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      exact_ = exact;
      return *this;
    }
    Options&& set_exact(bool exact) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_exact(exact));
    }
    bool exact() const { return exact_; }

   private:
    std::optional<Position> max_pos_;
    std::optional<Position> max_length_;
    bool exact_ = false;
  };

  // Returns the original `BackwardWriter`. Unchanged by `Close()`.
  virtual BackwardWriter* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Accesses the limit expressed as an absolute position.
  //
  // If `set_max_length()` was used, `max_pos()` returns the same limit
  // translated to an absolute position.
  //
  // If no limit is set, returns `std::numeric_limits<Position>::max()`.
  void set_max_pos(Position max_pos);
  Position max_pos() const { return max_pos_; }

  // Accesses the limit expressed as a length relative to the current position,
  // i.e. the length remaining to the limit.
  //
  // If `set_max_pos()` was used, `max_length()` returns the same limit
  // translated to a length relative to the current position.
  //
  // If no limit is set, returns `std::numeric_limits<Position>::max() - pos()`.
  void set_max_length(Position max_length);
  Position max_length() const { return SaturatingSub(max_pos_, pos()); }

  // Clears the limit.
  void clear_limit() { max_pos_ = std::numeric_limits<Position>::max(); }

  bool SupportsTruncate() override;

 protected:
  explicit LimitingBackwardWriterBase(Closed) noexcept
      : BackwardWriter(kClosed) {}

  explicit LimitingBackwardWriterBase(bool exact);

  LimitingBackwardWriterBase(LimitingBackwardWriterBase&& that) noexcept;
  LimitingBackwardWriterBase& operator=(
      LimitingBackwardWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(bool exact);
  void Initialize(BackwardWriter* dest, const Options& options, bool is_owning);
  bool exact() const { return exact_; }

  // Sets cursor of `dest` to cursor of `*this`. Fails `*this` if the limit is
  // exceeded.
  bool SyncBuffer(BackwardWriter& dest);

  // Sets buffer pointers of `*this` to buffer pointers of `dest`. Fails `*this`
  // if `dest` failed.
  void MakeBuffer(BackwardWriter& dest);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using BackwardWriter::WriteSlow;
  bool WriteSlow(absl::string_view src) override;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(absl::Cord&& src) override;
  bool WriteSlow(ByteFill src) override;
  bool TruncateImpl(Position new_size) override;

 private:
  ABSL_ATTRIBUTE_COLD bool FailLimitExceeded();
  ABSL_ATTRIBUTE_COLD bool FailLimitExceeded(BackwardWriter& dest);
  ABSL_ATTRIBUTE_COLD void FailLengthOverflow(Position max_length);

  // This template is defined and used only in limiting_backward_writer.cc.
  template <typename Src, typename RemovePrefix>
  bool WriteInternal(Src&& src, RemovePrefix&& remove_prefix);

  // Invariant: `start_pos() <= max_pos_`
  Position max_pos_ = std::numeric_limits<Position>::max();

  bool exact_ = false;

  // Invariants if `ok()`:
  //   `start() == DestWriter()->start()`
  //   `limit() == DestWriter()->limit()`
  //   `start_pos() == DestWriter()->start_pos()`
};

// A `BackwardWriter` which writes to another `BackwardWriter` up to the
// specified size limit. An attempt to write more fails, after writing to the
// destination everything up to the limit.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the original `BackwardWriter`. `Dest` must support
// `Dependency<BackwardWriter*, Dest>`, e.g.
// `BackwardWriter*` (not owned, default),
// `ChainBackwardWriter<>` (owned), `std::unique_ptr<BackwardWriter>` (owned),
// `Any<BackwardWriter*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The original `BackwardWriter` must not be accessed until the
// `LimitingBackwardWriter` is closed or no longer used, except that it is
// allowed to read the destination of the original `BackwardWriter` immediately
// after `Flush()`.
template <typename Dest = BackwardWriter*>
class LimitingBackwardWriter : public LimitingBackwardWriterBase {
 public:
  // Creates a closed `LimitingBackwardWriter`.
  explicit LimitingBackwardWriter(Closed) noexcept
      : LimitingBackwardWriterBase(kClosed) {}

  // Will write to the original `BackwardWriter` provided by `dest`.
  explicit LimitingBackwardWriter(Initializer<Dest> dest,
                                  Options options = Options());

  LimitingBackwardWriter(LimitingBackwardWriter&& that) = default;
  LimitingBackwardWriter& operator=(LimitingBackwardWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `LimitingBackwardWriter`.
  // This avoids constructing a temporary `LimitingBackwardWriter` and moving
  // from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the original
  // `BackwardWriter`. Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  BackwardWriter* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  class Mover;

  // The object providing and possibly owning the original `BackwardWriter`.
  MovingDependency<BackwardWriter*, Dest, Mover> dest_;
};

explicit LimitingBackwardWriter(Closed)
    -> LimitingBackwardWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit LimitingBackwardWriter(Dest&& dest,
                                LimitingBackwardWriterBase::Options options =
                                    LimitingBackwardWriterBase::Options())
    -> LimitingBackwardWriter<TargetT<Dest>>;

// Implementation details follow.

inline LimitingBackwardWriterBase::LimitingBackwardWriterBase(bool exact)
    : exact_(exact) {}

inline LimitingBackwardWriterBase::LimitingBackwardWriterBase(
    LimitingBackwardWriterBase&& that) noexcept
    : BackwardWriter(static_cast<BackwardWriter&&>(that)),
      max_pos_(that.max_pos_),
      exact_(that.exact_) {}

inline LimitingBackwardWriterBase& LimitingBackwardWriterBase::operator=(
    LimitingBackwardWriterBase&& that) noexcept {
  BackwardWriter::operator=(static_cast<BackwardWriter&&>(that));
  max_pos_ = that.max_pos_;
  exact_ = that.exact_;
  return *this;
}

inline void LimitingBackwardWriterBase::Reset(Closed) {
  BackwardWriter::Reset(kClosed);
  max_pos_ = std::numeric_limits<Position>::max();
  exact_ = false;
}

inline void LimitingBackwardWriterBase::Reset(bool exact) {
  BackwardWriter::Reset();
  max_pos_ = std::numeric_limits<Position>::max();
  exact_ = exact;
}

inline bool LimitingBackwardWriterBase::SyncBuffer(BackwardWriter& dest) {
  if (ABSL_PREDICT_FALSE(pos() > max_pos_)) {
    dest.set_cursor(cursor() + IntCast<size_t>(pos() - max_pos_));
    return FailLimitExceeded(dest);
  }
  dest.set_cursor(cursor());
  return true;
}

inline void LimitingBackwardWriterBase::MakeBuffer(BackwardWriter& dest) {
  set_buffer(dest.limit(), dest.start_to_limit(), dest.start_to_cursor());
  set_start_pos(dest.start_pos());
  if (ABSL_PREDICT_FALSE(start_pos() > max_pos_)) {
    set_buffer(cursor());
    set_start_pos(max_pos_);
    FailLimitExceeded();
  }
  if (ABSL_PREDICT_FALSE(!dest.ok())) FailWithoutAnnotation(dest.status());
}

template <typename Dest>
class LimitingBackwardWriter<Dest>::Mover {
 public:
  static auto member() { return &LimitingBackwardWriter::dest_; }

  explicit Mover(LimitingBackwardWriter& self, LimitingBackwardWriter& that)
      : uses_buffer_(self.start() != nullptr) {
    // Buffer pointers are already moved so `SyncBuffer()` is called on `self`.
    // `dest_` is not moved yet so `dest_` is taken from `that`.
    if (uses_buffer_) {
      if (ABSL_PREDICT_FALSE(!self.SyncBuffer(*that.dest_))) {
        uses_buffer_ = false;
      }
    }
  }

  void Done(LimitingBackwardWriter& self) {
    if (uses_buffer_) self.MakeBuffer(*self.dest_);
  }

 private:
  bool uses_buffer_;
};

template <typename Dest>
inline LimitingBackwardWriter<Dest>::LimitingBackwardWriter(
    Initializer<Dest> dest, Options options)
    : LimitingBackwardWriterBase(options.exact()), dest_(std::move(dest)) {
  Initialize(dest_.get(), options, dest_.IsOwning());
}

template <typename Dest>
inline void LimitingBackwardWriter<Dest>::Reset(Closed) {
  LimitingBackwardWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void LimitingBackwardWriter<Dest>::Reset(Initializer<Dest> dest,
                                                Options options) {
  LimitingBackwardWriterBase::Reset(options.exact());
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options, dest_.IsOwning());
}

template <typename Dest>
void LimitingBackwardWriter<Dest>::Done() {
  LimitingBackwardWriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(dest_->status());
    }
  }
}

template <typename Dest>
void LimitingBackwardWriter<Dest>::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  if (dest_.IsOwning() && !exact()) {
    if (ABSL_PREDICT_FALSE(!SyncBuffer(*dest_))) return;
    dest_->SetWriteSizeHint(
        write_size_hint == std::nullopt
            ? std::nullopt
            : std::make_optional(UnsignedMin(*write_size_hint, max_length())));
    MakeBuffer(*dest_);
  }
}

template <typename Dest>
bool LimitingBackwardWriter<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(!SyncBuffer(*dest_))) return false;
  bool flush_ok = true;
  if (flush_type != FlushType::kFromObject || dest_.IsOwning()) {
    flush_ok = dest_->Flush(flush_type);
  }
  MakeBuffer(*dest_);
  return flush_ok;
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_LIMITING_BACKWARD_WRITER_H_


================================================
FILE: riegeli/bytes/limiting_reader.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/limiting_reader.h"

#include <stddef.h>

#include <limits>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void LimitingReaderBase::Initialize(Reader* src, const Options& options) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of LimitingReader: null Reader pointer";
  set_buffer(src->start(), src->start_to_limit(), src->start_to_cursor());
  set_limit_pos(src->limit_pos());
  if (ABSL_PREDICT_FALSE(!src->ok())) FailWithoutAnnotation(src->status());
  if (options.max_pos() != std::nullopt) {
    set_max_pos(*options.max_pos());
  } else if (options.max_length() != std::nullopt) {
    set_max_length(*options.max_length());
  }
}

void LimitingReaderBase::MakeBufferSlow() {
  if (pos() > max_pos_) {
    set_buffer(cursor());
  } else {
    set_buffer(start(),
               start_to_limit() - IntCast<size_t>(limit_pos() - max_pos_),
               start_to_cursor());
  }
  set_limit_pos(max_pos_);
}

void LimitingReaderBase::Done() {
  if (ABSL_PREDICT_TRUE(ok())) {
    Reader& src = *SrcReader();
    SyncBuffer(src);
    if (fail_if_longer_ && pos() == max_pos_ &&
        ABSL_PREDICT_FALSE(src.Pull())) {
      // Do not call `Fail()` because `AnnotateStatusImpl()` synchronizes the
      // buffer again.
      FailWithoutAnnotation(src.AnnotateStatus(
          absl::ResourceExhaustedError("Position limit exceeded")));
    }
  }
  Reader::Done();
}

inline bool LimitingReaderBase::CheckEnough() {
  if (ABSL_PREDICT_FALSE(exact_)) return FailNotEnough();
  return false;
}

inline bool LimitingReaderBase::FailNotEnough() {
  return Fail(absl::InvalidArgumentError(
      max_pos() == std::numeric_limits<Position>::max()
          ? "Not enough data: expected impossibly much"
          : absl::StrCat("Not enough data: expected at least ", max_pos(),
                         " or ", max_length(), " more")));
}

void LimitingReaderBase::FailNotEnoughAtPos(Position expected_pos) {
  Fail(absl::InvalidArgumentError(
      absl::StrCat("Not enough data: expected at least ", expected_pos,
                   ", will have at most ", max_pos())));
}

void LimitingReaderBase::FailNotEnoughAtLength(Position expected_length) {
  Fail(absl::InvalidArgumentError(
      absl::StrCat("Not enough data: expected at least ", expected_length,
                   " more, will have at most ", max_length(), " more")));
}

void LimitingReaderBase::FailNotEnoughAtEnd() {
  Fail(absl::InvalidArgumentError(absl::StrCat(
      "Not enough data: expected impossibly much, will have at most ",
      max_pos())));
}

void LimitingReaderBase::FailLengthOverflow(Position max_length) {
  Fail(absl::InvalidArgumentError(
      absl::StrCat("Not enough data: expected at least ", max_length,
                   " more, which overflows the Reader position range")));
}

void LimitingReaderBase::FailPositionLimitExceeded() {
  Fail(absl::ResourceExhaustedError("Position limit exceeded"));
}

absl::Status LimitingReaderBase::AnnotateStatusImpl(absl::Status status) {
  // Fully delegate annotations to `*SrcReader()`.
  if (is_open()) {
    Reader& src = *SrcReader();
    SyncBuffer(src);
    status = src.AnnotateStatus(std::move(status));
    MakeBuffer(src);
  }
  return status;
}

bool LimitingReaderBase::PullSlow(size_t min_length,
                                  size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Reader::PullSlow(): "
         "enough data available, use Pull() instead";
  RIEGELI_ASSERT_LE(pos(), max_pos_)
      << "Failed invariant of LimitingReaderBase: position exceeds the limit";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const size_t min_length_to_pull = UnsignedMin(min_length, max_pos_ - pos());
  const bool pull_ok = src.Pull(min_length_to_pull, recommended_length);
  MakeBuffer(src);
  if (ABSL_PREDICT_FALSE(!pull_ok)) return CheckEnough();
  return min_length_to_pull == min_length;
}

bool LimitingReaderBase::ReadSlow(size_t length, char* dest) {
  RIEGELI_ASSERT_LT(available(), length)
      << "Failed precondition of Reader::ReadSlow(char*): "
         "enough data available, use Read(char&) instead";
  RIEGELI_ASSERT_LE(pos(), max_pos_)
      << "Failed invariant of LimitingReaderBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const size_t length_to_read = UnsignedMin(length, max_pos_ - pos());
  const bool read_ok = src.Read(length_to_read, dest);
  MakeBuffer(src);
  if (ABSL_PREDICT_FALSE(!read_ok)) return CheckEnough();
  return length_to_read == length;
}

bool LimitingReaderBase::ReadSlow(size_t length, Chain& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "enough data available, use Read(Chain&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "Chain size overflow";
  return ReadInternal(length, dest);
}

bool LimitingReaderBase::ReadSlow(size_t length, absl::Cord& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "enough data available, use Read(Cord&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "Cord size overflow";
  return ReadInternal(length, dest);
}

template <typename Dest>
inline bool LimitingReaderBase::ReadInternal(size_t length, Dest& dest) {
  RIEGELI_ASSERT_LE(pos(), max_pos_)
      << "Failed invariant of LimitingReaderBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const size_t length_to_read = UnsignedMin(length, max_pos_ - pos());
  const bool read_ok = src.ReadAndAppend(length_to_read, dest);
  MakeBuffer(src);
  if (ABSL_PREDICT_FALSE(!read_ok)) return CheckEnough();
  return length_to_read == length;
}

bool LimitingReaderBase::CopySlow(Position length, Writer& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::CopySlow(Writer&): "
         "enough data available, use Copy(Writer&) instead";
  RIEGELI_ASSERT_LE(pos(), max_pos_)
      << "Failed invariant of LimitingReaderBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const Position length_to_copy = UnsignedMin(length, max_pos_ - pos());
  const bool copy_ok = src.Copy(length_to_copy, dest);
  MakeBuffer(src);
  if (ABSL_PREDICT_FALSE(!copy_ok)) {
    if (dest.ok()) return CheckEnough();
    return false;
  }
  return length_to_copy == length;
}

bool LimitingReaderBase::CopySlow(size_t length, BackwardWriter& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::CopySlow(BackwardWriter&): "
         "enough data available, use Copy(BackwardWriter&) instead";
  RIEGELI_ASSERT_LE(pos(), max_pos_)
      << "Failed invariant of LimitingReaderBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  if (ABSL_PREDICT_FALSE(length > max_pos_ - pos())) {
    const bool seek_ok = src.Seek(max_pos_);
    MakeBuffer(src);
    if (ABSL_PREDICT_FALSE(!seek_ok)) return CheckEnough();
    return false;
  }
  const bool copy_ok = src.Copy(length, dest);
  MakeBuffer(src);
  if (ABSL_PREDICT_FALSE(!copy_ok)) {
    if (dest.ok()) return CheckEnough();
    return false;
  }
  return true;
}

bool LimitingReaderBase::ReadSomeSlow(size_t max_length, char* dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "nothing to read, use ReadSome(char*) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "some data available, use ReadSome(char*) instead";
  RIEGELI_ASSERT_LE(pos(), max_pos_)
      << "Failed invariant of LimitingReaderBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  max_length = UnsignedMin(max_length, max_pos_ - pos());
  const bool read_ok = src.ReadSome(max_length, dest);
  MakeBuffer(src);
  if (ABSL_PREDICT_FALSE(!read_ok)) return CheckEnough();
  return max_length > 0;
}

bool LimitingReaderBase::CopySomeSlow(size_t max_length, Writer& dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::CopySomeSlow(Writer&): "
         "nothing to read, use CopySome(Writer&) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::CopySomeSlow(Writer&): "
         "some data available, use CopySome(Writer&) instead";
  RIEGELI_ASSERT_LE(pos(), max_pos_)
      << "Failed invariant of LimitingReaderBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  max_length = UnsignedMin(max_length, max_pos_ - pos());
  const bool copy_ok = src.CopySome(max_length, dest);
  MakeBuffer(src);
  if (ABSL_PREDICT_FALSE(!copy_ok)) {
    if (dest.ok()) return CheckEnough();
    return false;
  }
  return max_length > 0;
}

void LimitingReaderBase::ReadHintSlow(size_t min_length,
                                      size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Reader::ReadHintSlow(): "
         "enough data available, use ReadHint() instead";
  RIEGELI_ASSERT_LE(pos(), max_pos_)
      << "Failed invariant of LimitingReaderBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const Position remaining = max_pos_ - pos();
  src.ReadHint(UnsignedMin(min_length, remaining),
               UnsignedMin(recommended_length, remaining));
  MakeBuffer(src);
}

bool LimitingReaderBase::ToleratesReadingAhead() {
  Reader* const src = SrcReader();
  return src != nullptr && src->ToleratesReadingAhead();
}

bool LimitingReaderBase::SupportsRandomAccess() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsRandomAccess();
}

bool LimitingReaderBase::SupportsRewind() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsRewind();
}

bool LimitingReaderBase::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of Reader::SeekSlow(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT_LE(pos(), max_pos_)
      << "Failed invariant of LimitingReaderBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const Position pos_to_seek = UnsignedMin(new_pos, max_pos_);
  const bool seek_ok = src.Seek(pos_to_seek);
  MakeBuffer(src);
  if (ABSL_PREDICT_FALSE(!seek_ok)) return CheckEnough();
  return pos_to_seek == new_pos;
}

bool LimitingReaderBase::SupportsSize() {
  if (exact_) return true;
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsSize();
}

std::optional<Position> LimitingReaderBase::SizeImpl() {
  RIEGELI_ASSERT_LE(pos(), max_pos_)
      << "Failed invariant of LimitingReaderBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  if (exact_) return max_pos_;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const std::optional<Position> size = src.Size();
  MakeBuffer(src);
  if (ABSL_PREDICT_FALSE(size == std::nullopt)) return std::nullopt;
  return UnsignedMin(*size, max_pos_);
}

bool LimitingReaderBase::SupportsNewReader() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsNewReader();
}

std::unique_ptr<Reader> LimitingReaderBase::NewReaderImpl(
    Position initial_pos) {
  RIEGELI_ASSERT_LE(pos(), max_pos_)
      << "Failed invariant of LimitingReaderBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point
  // if `SrcReader()->SupportsNewReader()`.
  Reader& src = *SrcReader();
  std::unique_ptr<Reader> reader =
      src.NewReader(UnsignedMin(initial_pos, max_pos_));
  if (ABSL_PREDICT_FALSE(reader == nullptr)) {
    FailWithoutAnnotation(src.status());
    return nullptr;
  }
  return std::make_unique<LimitingReader<std::unique_ptr<Reader>>>(
      std::move(reader),
      LimitingReaderBase::Options().set_max_pos(max_pos_).set_exact(exact_));
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/limiting_reader.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_LIMITING_READER_H_
#define RIEGELI_BYTES_LIMITING_READER_H_

#include <stddef.h>

#include <limits>
#include <memory>
#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

class BackwardWriter;
class ScopedLimiter;
class Writer;

// Template parameter independent part of `LimitingReader`.
class LimitingReaderBase : public Reader {
 public:
  class Options {
   public:
    Options() noexcept {}

    // The limit expressed as an absolute position.
    //
    // `std::nullopt` means no limit, unless `max_length()` is set.
    //
    // Default: `std::nullopt`.
    Options& set_max_pos(std::optional<Position> max_pos) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      max_pos_ = max_pos;
      max_length_ = std::nullopt;
      return *this;
    }
    Options&& set_max_pos(std::optional<Position> max_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_max_pos(max_pos));
    }
    std::optional<Position> max_pos() const { return max_pos_; }

    // A shortcut for `set_max_pos(pos)` with `set_exact(true)`.
    Options& set_exact_pos(Position exact_pos) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return set_max_pos(exact_pos).set_exact(true);
    }
    Options&& set_exact_pos(Position exact_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_exact_pos(exact_pos));
    }

    // The limit expressed as a length relative to the current position.
    //
    // `std::nullopt` means no limit, unless `max_pos()` is set.
    //
    // Default: `std::nullopt`.
    Options& set_max_length(std::optional<Position> max_length) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      max_length_ = max_length;
      max_pos_ = std::nullopt;
      return *this;
    }
    Options&& set_max_length(std::optional<Position> max_length) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_max_length(max_length));
    }
    std::optional<Position> max_length() const { return max_length_; }

    // A shortcut for `set_max_length(length)` with `set_exact(true)`.
    Options& set_exact_length(Position exact_length) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return set_max_length(exact_length).set_exact(true);
    }
    Options&& set_exact_length(Position exact_length) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_exact_length(exact_length));
    }

    // If `false`, `LimitingReader` will read data at most up to the limit.
    // Reading will end cleanly when either the limit is reached or the source
    // ends.
    //
    // If `true`, `LimitingReader` will read data exactly up to the limit.
    // Reading will end cleanly when the limit is reached, but will fail if the
    // source ends before the limit.
    //
    // Default: `false`.
    Options& set_exact(bool exact) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      exact_ = exact;
      return *this;
    }
    Options&& set_exact(bool exact) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_exact(exact));
    }
    bool exact() const { return exact_; }

    // If `false`, `LimitingReader` will allow the original source to exceed the
    // limit.
    //
    // If `true`, `LimitingReader` will require the original source to end
    // before or at the limit (depending on `exact()`), but will fail if the
    // original source exceeds the limit. This is checked when `LimitingReader`
    // is closed while positioned at its end.
    //
    // Default: `false`.
    Options& set_fail_if_longer(bool fail_if_longer) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      fail_if_longer_ = fail_if_longer;
      return *this;
    }
    Options&& set_fail_if_longer(bool fail_if_longer) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_fail_if_longer(fail_if_longer));
    }
    bool fail_if_longer() const { return fail_if_longer_; }

   private:
    std::optional<Position> max_pos_;
    std::optional<Position> max_length_;
    bool exact_ = false;
    bool fail_if_longer_ = false;
  };

  // Returns the original `Reader`. Unchanged by `Close()`.
  virtual Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Accesses the limit expressed as an absolute position.
  //
  // If `set_max_length()` was used, `max_pos()` returns the same limit
  // translated to an absolute position.
  //
  // If no limit is set, returns `std::numeric_limits<Position>::max()`.
  //
  // `ScopedLimiter` is often easier to use correctly than calling
  // `set_max_pos()` directly.
  void set_max_pos(Position max_pos);
  Position max_pos() const { return max_pos_; }

  // Accesses the limit expressed as a length relative to the current position,
  // i.e. the length remaining to the limit.
  //
  // If `set_max_pos()` was used, `max_length()` returns the same limit
  // translated to a length relative to the current position.
  //
  // If no limit is set, returns `std::numeric_limits<Position>::max() - pos()`.
  //
  // `ScopedLimiter` is often easier to use correctly than calling
  // `set_max_length()` directly.
  void set_max_length(Position max_length);
  Position max_length() const;

  // Clears the limit.
  void clear_limit() { max_pos_ = std::numeric_limits<Position>::max(); }

  // Accesses the exactness setting.
  //
  // If `false`, `LimitingReader` will read data at most up to the limit.
  // Reading will end cleanly when either the limit is reached or the source
  // ends.
  //
  // If `true`, `LimitingReader` will read data exactly up to the limit.
  // Reading will end cleanly when the limit is reached, but will fail if the
  // source ends before the limit.
  //
  // `ScopedLimiter` is often easier to use correctly than calling `set_exact()`
  // directly.
  void set_exact(bool exact) { exact_ = exact; }
  bool exact() const { return exact_; }

  // Accesses the failure if larger setting.
  //
  // If `false`, `LimitingReader` will allow the original source to exceed the
  // limit.
  //
  // If `true`, `LimitingReader` will require the original source to end before
  // or at the limit (depending on `exact()`), but will fail if the original
  // source exceeds the limit. This is checked when `LimitingReader` is closed
  // while positioned at its end.
  void set_fail_if_longer(bool fail_if_longer) {
    fail_if_longer_ = fail_if_longer;
  }
  bool fail_if_longer() const { return fail_if_longer_; }

  bool ToleratesReadingAhead() override;
  bool SupportsRandomAccess() override;
  bool SupportsRewind() override;
  bool SupportsSize() override;
  bool SupportsNewReader() override;

 protected:
  explicit LimitingReaderBase(Closed) noexcept : Reader(kClosed) {}

  explicit LimitingReaderBase(bool exact, bool fail_if_longer);

  LimitingReaderBase(LimitingReaderBase&& that) noexcept;
  LimitingReaderBase& operator=(LimitingReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset(bool exact, bool fail_if_longer);
  void Initialize(Reader* src, const Options& options);

  // Sets cursor of `src` to cursor of `*this`.
  void SyncBuffer(Reader& src);

  // Sets buffer pointers of `*this` to buffer pointers of `src`, adjusting
  // them for `max_pos_`. Fails `*this` if `src` failed.
  void MakeBuffer(Reader& src);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool PullSlow(size_t min_length, size_t recommended_length) override;
  using Reader::ReadSlow;
  bool ReadSlow(size_t length, char* dest) override;
  bool ReadSlow(size_t length, Chain& dest) override;
  bool ReadSlow(size_t length, absl::Cord& dest) override;
  using Reader::CopySlow;
  bool CopySlow(Position length, Writer& dest) override;
  bool CopySlow(size_t length, BackwardWriter& dest) override;
  using Reader::ReadSomeSlow;
  bool ReadSomeSlow(size_t max_length, char* dest) override;
  using Reader::CopySomeSlow;
  bool CopySomeSlow(size_t max_length, Writer& dest) override;
  void ReadHintSlow(size_t min_length, size_t recommended_length) override;
  bool SeekSlow(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;

 private:
  // For `FailNotEnoughAtPos()`, `FailNotEnoughAtLength()`,
  // `FailNotEnoughAtEnd()`, and `FailPositionLimitExceeded()`.
  friend class ScopedLimiter;

  bool CheckEnough();
  ABSL_ATTRIBUTE_COLD bool FailNotEnough();
  ABSL_ATTRIBUTE_COLD void FailNotEnoughAtPos(Position expected_pos);
  ABSL_ATTRIBUTE_COLD void FailNotEnoughAtLength(Position expected_length);
  ABSL_ATTRIBUTE_COLD void FailNotEnoughAtEnd();
  ABSL_ATTRIBUTE_COLD void FailLengthOverflow(Position max_length);
  ABSL_ATTRIBUTE_COLD void FailPositionLimitExceeded();

  void restore_max_pos(Position max_pos);

  void MakeBufferSlow();

  // This template is defined and used only in limiting_reader.cc.
  template <typename Dest>
  bool ReadInternal(size_t length, Dest& dest);

  // Invariant: `pos() <= max_pos_`
  Position max_pos_ = std::numeric_limits<Position>::max();

  bool exact_ = false;
  bool fail_if_longer_ = false;

  // Invariants if `is_open()`:
  //   `start() >= SrcReader()->start()`
  //   `limit() <= SrcReader()->limit()`
  //   `start_pos() >= SrcReader()->start_pos()`
  //   `limit_pos() <= max_pos_`
};

// A `Reader` which reads from another `Reader` up to the specified limit, then
// pretends that the source ends, or fails if configured to fail and the source
// is longer.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the original `Reader`. `Src` must support
// `Dependency<Reader*, Src>`, e.g. `Reader*` (not owned, default),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The original `Reader` must not be accessed until the `LimitingReader` is
// closed or no longer used.
//
// For reading multiple delimited fragments, two techniques can be used:
//
//  * Create a `LimitingReader` without a limit. For each delimited fragment
//    create a `ScopedLimiter`.
//
//  * Create a `LimitingReader` without a limit. For each delimited fragment
//    use `set_max_length()` or `set_max_pos()`, and also possibly
//    `clear_limit()` to read data between fragments.
template <typename Src = Reader*>
class LimitingReader : public LimitingReaderBase {
 public:
  // Creates a closed `LimitingReader`.
  explicit LimitingReader(Closed) noexcept : LimitingReaderBase(kClosed) {}

  // Will read from the original `Reader` provided by `src`.
  explicit LimitingReader(Initializer<Src> src, Options options = Options());

  LimitingReader(LimitingReader&& that) = default;
  LimitingReader& operator=(LimitingReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `LimitingReader`. This
  // avoids constructing a temporary `LimitingReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());

  // Returns the object providing and possibly owning the original `Reader`.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;
  bool SyncImpl(SyncType sync_type) override;

 private:
  class Mover;

  // The object providing and possibly owning the original `Reader`.
  MovingDependency<Reader*, Src, Mover> src_;
};

explicit LimitingReader(Closed) -> LimitingReader<DeleteCtad<Closed>>;
template <typename Src>
explicit LimitingReader(Src&& src, LimitingReaderBase::Options options =
                                       LimitingReaderBase::Options())
    -> LimitingReader<TargetT<Src>>;

// `ReaderSpan` specifies a span of `Reader` contents from the current position
// with the given length. The type of the `Reader` is specified as a template
// parameter so that `LimitingReaderBase` can be treated specially.
//
// This can express the span as a single object, which is sometimes convenient.
//
// `ReaderSpan` supports `Dependency<Reader*, ReaderSpan<ReaderType>>`,
// which internally applies a `ScopedLimiterOrLimitingReader`. Some functions
// treat a parameter of type `ReaderSpan` specially to enable a more efficient
// implementation.
template <typename ReaderType = LimitingReaderBase>
class ReaderSpan {
 public:
  // Specifies the span from the current position of `*reader` with `length`.
  explicit ReaderSpan(ReaderType* reader, Position length)
      : reader_(reader), length_(length) {}

  template <typename OtherReaderType,
            std::enable_if_t<
                std::conjunction_v<
                    std::negation<std::is_same<OtherReaderType, ReaderType>>,
                    std::is_convertible<OtherReaderType*, ReaderType*>>,
                int> = 0>
  /*implicit*/ ReaderSpan(ReaderSpan<OtherReaderType> src)
      : ReaderSpan(&src.reader(), src.length()) {}

  ReaderSpan(ReaderSpan&& that) = default;
  ReaderSpan& operator=(ReaderSpan&& that) = default;

  ReaderType& reader() const { return *reader_; }
  Position length() const { return length_; }

 private:
  ReaderType* reader_;
  Position length_;
};

template <typename ReaderType>
explicit ReaderSpan(ReaderType* reader, Position length)
    -> ReaderSpan<ReaderType>;

// Changes the options of a `LimitingReader` in the constructor, and restores
// them in the destructor.
class ScopedLimiter {
 public:
  using Options = LimitingReaderBase::Options;

  // Changes the effective options of `*reader` to be more strict than either
  // the provided options or the previous options. The limit can become only
  // smaller, and `exact()` can change only from `false` to `true`.
  //
  // This is similar to making a new `LimitingReader` reading from the previous
  // `LimitingReader` but more efficient. A difference is when `options.exact()`
  // is `true` and the new limit exceeds the old limit. In this case the
  // `LimitingReader` fails immediately rather than when the old limit is
  // reached, because it is already known that it cannot eventually succeed.
  explicit ScopedLimiter(LimitingReaderBase* reader
                             ABSL_ATTRIBUTE_LIFETIME_BOUND,
                         Options options);

  explicit ScopedLimiter(const ReaderSpan<>& src)
      : ScopedLimiter(
            &src.reader(),
            LimitingReaderBase::Options().set_exact_length(src.length())) {}

  ScopedLimiter(const ScopedLimiter&) = delete;
  ScopedLimiter& operator=(const ScopedLimiter&) = delete;

  // Returns the underlying `LimitingReaderBase`.
  LimitingReaderBase& reader() const { return *reader_; }

  // Restores the options.
  //
  // Precondition:
  //   `reader->max_pos()` is not smaller than it was
  //       when the `ScopedLimiter` was constructed.
  ~ScopedLimiter();

 private:
  LimitingReaderBase* reader_;
  Position old_max_pos_;
  bool old_exact_;
  bool fail_if_longer_;
};

// If `src` is a `LimitingReaderBase`, creates a `ScopedLimiter`, otherwise
// creates a new `LimitingReader`.
//
// The primary template applies if `Src` is a `Reader` other than
// `LimitingReaderBase`.
template <typename Src, typename Enable = void>
class ScopedLimiterOrLimitingReader {
 public:
  using Options = LimitingReaderBase::Options;

  explicit ScopedLimiterOrLimitingReader(Src* src ABSL_ATTRIBUTE_LIFETIME_BOUND,
                                         Options options)
      : reader_(src, options) {}

  explicit ScopedLimiterOrLimitingReader(const ReaderSpan<Src>& src)
      : reader_(&src.reader(),
                LimitingReaderBase::Options().set_exact_length(src.length())) {}

  ScopedLimiterOrLimitingReader(const ScopedLimiterOrLimitingReader&) = delete;
  ScopedLimiterOrLimitingReader& operator=(
      const ScopedLimiterOrLimitingReader&) = delete;

  // Returns the `LimitingReaderBase` from which data should be read.
  LimitingReaderBase& reader() { return reader_; }

  // Closes the `LimitingReaderBase`. If this fails, `reader().status()` can be
  // used.
  //
  // For consistency with the specialization if `Src` is a `LimitingReaderBase`,
  // the original `Reader` must not be changed between `Close()` and destroying
  // the `ScopedLimiterOrLimitingReader`.
  bool Close() { return reader_.Close(); }

 private:
  LimitingReader<> reader_;
};

// Specialization of `ScopedLimiterOrLimitingReader` if `Src` is a
// `LimitingReaderBase`.
//
// In this specialization the original `LimitingReaderBase` is accessed directly
// while its limits have been changed.
template <typename Src>
class ScopedLimiterOrLimitingReader<
    Src, std::enable_if_t<std::is_base_of_v<LimitingReaderBase, Src>>> {
 public:
  using Options = LimitingReaderBase::Options;

  ABSL_ATTRIBUTE_ALWAYS_INLINE
  explicit ScopedLimiterOrLimitingReader(Src* src ABSL_ATTRIBUTE_LIFETIME_BOUND,
                                         Options options)
      : limiter_(src, options) {}

  explicit ScopedLimiterOrLimitingReader(const ReaderSpan<Src>& src)
      : limiter_(&src.reader(),
                 LimitingReaderBase::Options().set_exact_length(src.length())) {
  }

  ScopedLimiterOrLimitingReader(const ScopedLimiterOrLimitingReader&) = delete;
  ScopedLimiterOrLimitingReader& operator=(
      const ScopedLimiterOrLimitingReader&) = delete;

  ABSL_ATTRIBUTE_ALWAYS_INLINE
  ~ScopedLimiterOrLimitingReader() = default;

  // Returns the `LimitingReaderBase` from which data should be read.
  LimitingReaderBase& reader() { return limiter_.reader(); }

  // Does nothing. The state of the original `LimitingReaderBase` is restored by
  // the destructor.instead.
  //
  // For consistency with the primary template, `Close()` should still be
  // called. The original `Reader` must not be changed between `Close()` and
  // destroying the `ScopedLimiterOrLimitingReader`.
  bool Close() { return reader().ok(); }

 private:
  ScopedLimiter limiter_;
};

template <typename Src>
explicit ScopedLimiterOrLimitingReader(Src* src,
                                       LimitingReaderBase::Options options)
    -> ScopedLimiterOrLimitingReader<Src>;

template <typename Src>
explicit ScopedLimiterOrLimitingReader(const ReaderSpan<Src>& src)
    -> ScopedLimiterOrLimitingReader<Src>;

// Specialization of `DependencyImpl<Reader*, ReaderSpan<ReaderType>>`.
template <typename ReaderType>
class DependencyImpl<Reader*, ReaderSpan<ReaderType>> {
 public:
  explicit DependencyImpl(ReaderSpan<ReaderType> span)
      : span_(std::move(span)),
        scoped_limiter_(
            &span_.reader(),
            LimitingReaderBase::Options().set_exact_length(span_.length())) {}

  ReaderSpan<ReaderType>& manager() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return span_;
  }
  const ReaderSpan<ReaderType>& manager() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return span_;
  }

  LimitingReaderBase* get() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return &scoped_limiter_.reader();
  }

  bool IsOwning() const { return false; }

  static constexpr bool kIsStable = false;

 protected:
  DependencyImpl(const DependencyImpl&) = delete;
  DependencyImpl& operator=(const DependencyImpl&) = delete;

  ~DependencyImpl() = default;

 private:
  ReaderSpan<ReaderType> span_;
  mutable ScopedLimiterOrLimitingReader<ReaderType> scoped_limiter_;
};

// Implementation details follow.

inline LimitingReaderBase::LimitingReaderBase(bool exact, bool fail_if_longer)
    : exact_(exact), fail_if_longer_(fail_if_longer) {}

inline LimitingReaderBase::LimitingReaderBase(
    LimitingReaderBase&& that) noexcept
    : Reader(static_cast<Reader&&>(that)),
      max_pos_(that.max_pos_),
      exact_(that.exact_),
      fail_if_longer_(that.fail_if_longer_) {}

inline LimitingReaderBase& LimitingReaderBase::operator=(
    LimitingReaderBase&& that) noexcept {
  Reader::operator=(static_cast<Reader&&>(that));
  max_pos_ = that.max_pos_;
  exact_ = that.exact_;
  fail_if_longer_ = that.fail_if_longer_;
  return *this;
}

inline void LimitingReaderBase::Reset(Closed) {
  Reader::Reset(kClosed);
  max_pos_ = std::numeric_limits<Position>::max();
  exact_ = false;
  fail_if_longer_ = false;
}

inline void LimitingReaderBase::Reset(bool exact, bool fail_if_longer) {
  Reader::Reset();
  max_pos_ = std::numeric_limits<Position>::max();
  exact_ = exact;
  fail_if_longer_ = fail_if_longer;
}

inline void LimitingReaderBase::restore_max_pos(Position max_pos) {
  RIEGELI_ASSERT_GE(max_pos, max_pos_)
      << "Failed precondition of LimitingReaderBase::restore_max_pos(): "
         "the limit is being reduced";
  max_pos_ = max_pos;
}

inline Position LimitingReaderBase::max_length() const {
  RIEGELI_ASSERT_GE(max_pos_, pos())
      << "Failed invariant of LimitingReaderBase: "
         "position already exceeds its limit";
  return max_pos_ - pos();
}

inline void LimitingReaderBase::set_max_pos(Position max_pos) {
  max_pos_ = max_pos;
  if (ABSL_PREDICT_FALSE(limit_pos() > max_pos_)) MakeBufferSlow();
}

inline void LimitingReaderBase::set_max_length(Position max_length) {
  max_pos_ = pos() + max_length;  // Wrap-around is not an error.
  if (ABSL_PREDICT_FALSE(max_pos_ < max_length)) {
    max_pos_ = std::numeric_limits<Position>::max();
    if (exact_) FailLengthOverflow(max_length);
    return;
  }
  if (limit_pos() > max_pos_) {
    set_buffer(start(),
               start_to_limit() - IntCast<size_t>(limit_pos() - max_pos_),
               start_to_cursor());
    set_limit_pos(max_pos_);
  }
}

inline void LimitingReaderBase::SyncBuffer(Reader& src) {
  src.set_cursor(cursor());
}

inline void LimitingReaderBase::MakeBuffer(Reader& src) {
  set_buffer(src.start(), src.start_to_limit(), src.start_to_cursor());
  set_limit_pos(src.limit_pos());
  if (ABSL_PREDICT_FALSE(limit_pos() > max_pos_)) MakeBufferSlow();
  if (ABSL_PREDICT_FALSE(!src.ok())) FailWithoutAnnotation(src.status());
}

template <typename Src>
class LimitingReader<Src>::Mover {
 public:
  static auto member() { return &LimitingReader::src_; }

  explicit Mover(LimitingReader& self, LimitingReader& that)
      : uses_buffer_(self.start() != nullptr) {
    // Buffer pointers are already moved so `SyncBuffer()` is called on `self`.
    // `src_` is not moved yet so `src_` is taken from `that`.
    if (uses_buffer_) self.SyncBuffer(*that.src_);
  }

  void Done(LimitingReader& self) {
    if (uses_buffer_) self.MakeBuffer(*self.src_);
  }

 private:
  bool uses_buffer_;
};

template <typename Src>
inline LimitingReader<Src>::LimitingReader(Initializer<Src> src,
                                           Options options)
    : LimitingReaderBase(options.exact(), options.fail_if_longer()),
      src_(std::move(src)) {
  Initialize(src_.get(), options);
}

template <typename Src>
inline void LimitingReader<Src>::Reset(Closed) {
  LimitingReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void LimitingReader<Src>::Reset(Initializer<Src> src, Options options) {
  LimitingReaderBase::Reset(options.exact(), options.fail_if_longer());
  src_.Reset(std::move(src));
  Initialize(src_.get(), options);
}

template <typename Src>
void LimitingReader<Src>::Done() {
  LimitingReaderBase::Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      FailWithoutAnnotation(src_->status());
    }
  }
}

template <typename Src>
bool LimitingReader<Src>::SyncImpl(SyncType sync_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer(*src_);
  bool sync_ok = true;
  if (sync_type != SyncType::kFromObject || src_.IsOwning()) {
    sync_ok = src_->Sync(sync_type);
  }
  MakeBuffer(*src_);
  return sync_ok;
}

ABSL_ATTRIBUTE_ALWAYS_INLINE
inline ScopedLimiter::ScopedLimiter(
    LimitingReaderBase* reader ABSL_ATTRIBUTE_LIFETIME_BOUND, Options options)
    : reader_(RIEGELI_EVAL_ASSERT_NOTNULL(reader)),
      old_max_pos_(reader_->max_pos()),
      old_exact_(reader_->exact()),
      fail_if_longer_(options.fail_if_longer()) {
  if (options.max_pos() != std::nullopt) {
    if (ABSL_PREDICT_FALSE(*options.max_pos() > reader_->max_pos())) {
      if (options.exact()) reader_->FailNotEnoughAtPos(*options.max_pos());
    } else {
      reader_->set_max_pos(*options.max_pos());
    }
  } else if (options.max_length() != std::nullopt) {
    if (ABSL_PREDICT_FALSE(*options.max_length() > reader_->max_length())) {
      if (options.exact())
        reader_->FailNotEnoughAtLength(*options.max_length());
    } else {
      reader_->set_max_length(*options.max_length());
    }
  } else if (ABSL_PREDICT_FALSE(reader_->max_pos() <
                                    std::numeric_limits<Position>::max() &&
                                options.exact())) {
    reader_->FailNotEnoughAtEnd();
  }
  reader_->set_exact(true);
}

ABSL_ATTRIBUTE_ALWAYS_INLINE
inline ScopedLimiter::~ScopedLimiter() {
  RIEGELI_ASSERT_GE(old_max_pos_, reader_->max_pos())
      << "Failed precondtion of ~ScopedLimiter: "
         "The underlying LimitingReader increased its limit "
         "while the ScopedLimiter was active";
  const Position inner_max_pos = reader_->max_pos();
  reader_->restore_max_pos(old_max_pos_);
  reader_->set_exact(old_exact_);
  if (fail_if_longer_ && reader_->pos() == inner_max_pos &&
      ABSL_PREDICT_FALSE(reader_->Pull())) {
    reader_->FailPositionLimitExceeded();
  }
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_LIMITING_READER_H_


================================================
FILE: riegeli/bytes/limiting_writer.cc
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/limiting_writer.h"

#include <stddef.h>

#include <limits>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void LimitingWriterBase::Initialize(Writer* dest, const Options& options,
                                    bool is_owning) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of LimitingWriter: null Writer pointer";
  if (is_owning && exact()) {
    if (options.max_pos() != std::nullopt) {
      dest->SetWriteSizeHint(SaturatingSub(*options.max_pos(), dest->pos()));
    } else if (options.max_length() != std::nullopt) {
      dest->SetWriteSizeHint(*options.max_length());
    }
  }
  set_buffer(dest->start(), dest->start_to_limit(), dest->start_to_cursor());
  set_start_pos(dest->start_pos());
  if (ABSL_PREDICT_FALSE(!dest->ok())) FailWithoutAnnotation(dest->status());
  if (options.max_pos() != std::nullopt) {
    set_max_pos(*options.max_pos());
  } else if (options.max_length() != std::nullopt) {
    set_max_length(*options.max_length());
  }
}

void LimitingWriterBase::set_max_pos(Position max_pos) {
  max_pos_ = max_pos;
  if (ABSL_PREDICT_FALSE(start_pos() > max_pos_)) {
    set_buffer(cursor());
    set_start_pos(max_pos_);
    FailLimitExceeded();
  }
}

void LimitingWriterBase::set_max_length(Position max_length) {
  if (ABSL_PREDICT_FALSE(max_length >
                         std::numeric_limits<Position>::max() - pos())) {
    if (exact_) FailLengthOverflow(max_length);
    max_pos_ = std::numeric_limits<Position>::max();
    return;
  }
  set_max_pos(pos() + max_length);
}

void LimitingWriterBase::Done() {
  Writer& dest = *DestWriter();
  if (ABSL_PREDICT_TRUE(ok())) SyncBuffer(dest);
  if (exact_ && ABSL_PREDICT_FALSE(pos() < max_pos_)) {
    // Do not call `Fail()` because `AnnotateStatusImpl()` synchronizes the
    // buffer again.
    FailWithoutAnnotation(dest.AnnotateStatus(absl::InvalidArgumentError(
        absl::StrCat("Not enough data: expected ", max_pos(), " or ",
                     max_length(), " more"))));
  }
  Writer::Done();
}

bool LimitingWriterBase::FailLimitExceeded() {
  Writer& dest = *DestWriter();
  return FailLimitExceeded(dest);
}

bool LimitingWriterBase::FailLimitExceeded(Writer& dest) {
  set_start_pos(max_pos_);
  set_buffer();
  // Do not call `Fail()` because `AnnotateStatusImpl()` synchronizes the buffer
  // again.
  return FailWithoutAnnotation(dest.AnnotateStatus(
      absl::ResourceExhaustedError("Position limit exceeded")));
}

inline void LimitingWriterBase::FailLengthOverflow(Position max_length) {
  Fail(absl::InvalidArgumentError(
      absl::StrCat("Not enough data: expected ", max_length,
                   "more, which overflows the Writer position range")));
}

absl::Status LimitingWriterBase::AnnotateStatusImpl(absl::Status status) {
  // Fully delegate annotations to `*DestWriter()`.
  if (is_open()) {
    Writer& dest = *DestWriter();
    const bool sync_buffer_ok = SyncBuffer(dest);
    status = dest.AnnotateStatus(std::move(status));
    if (ABSL_PREDICT_TRUE(sync_buffer_ok)) MakeBuffer(dest);
  }
  return status;
}

bool LimitingWriterBase::PushSlow(size_t min_length,
                                  size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Writer::PushSlow(): "
         "enough space available, use Push() instead";
  RIEGELI_ASSERT_LE(start_pos(), max_pos_)
      << "Failed invariant of LimitingWriterBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(dest))) return false;
  const bool push_ok = dest.Push(min_length, recommended_length);
  MakeBuffer(dest);
  return push_ok;
}

bool LimitingWriterBase::WriteSlow(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of Writer::WriteSlow(string_view): "
         "enough space available, use Write(string_view) instead";
  return WriteInternal(src, [](absl::string_view src, size_t length) {
    src.remove_suffix(length);
    return src;
  });
}

bool LimitingWriterBase::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  return WriteInternal(std::move(src), [](ExternalRef src, size_t length) {
    Chain result(std::move(src));
    result.RemoveSuffix(length);
    return result;
  });
}

bool LimitingWriterBase::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  return WriteInternal(src, [](const Chain& src, size_t length) {
    Chain result = src;
    result.RemoveSuffix(length);
    return result;
  });
}

bool LimitingWriterBase::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  return WriteInternal(std::move(src),
                       [](Chain&& src, size_t length) -> Chain&& {
                         src.RemoveSuffix(length);
                         return std::move(src);
                       });
}

bool LimitingWriterBase::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  return WriteInternal(src, [](const absl::Cord& src, size_t length) {
    absl::Cord result = src;
    result.RemoveSuffix(length);
    return result;
  });
}

bool LimitingWriterBase::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  return WriteInternal(std::move(src),
                       [](absl::Cord&& src, size_t length) -> absl::Cord&& {
                         src.RemoveSuffix(length);
                         return std::move(src);
                       });
}

template <typename Src, typename RemoveSuffix>
inline bool LimitingWriterBase::WriteInternal(Src&& src,
                                              RemoveSuffix&& remove_suffix) {
  RIEGELI_ASSERT_LE(start_pos(), max_pos_)
      << "Failed invariant of LimitingWriterBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(dest))) return false;
  const Position max_length = max_pos_ - pos();
  if (ABSL_PREDICT_TRUE(src.size() <= max_length)) {
    const bool write_ok = dest.Write(std::forward<Src>(src));
    MakeBuffer(dest);
    return write_ok;
  }
  if (ABSL_PREDICT_FALSE(!dest.Write(std::forward<RemoveSuffix>(remove_suffix)(
          std::forward<Src>(src), src.size() - max_length)))) {
    MakeBuffer(dest);
    return false;
  }
  return FailLimitExceeded(dest);
}

bool LimitingWriterBase::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  RIEGELI_ASSERT_LE(start_pos(), max_pos_)
      << "Failed invariant of LimitingWriterBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(dest))) return false;
  const Position max_length = max_pos_ - pos();
  if (ABSL_PREDICT_TRUE(src.size() <= max_length)) {
    const bool write_ok = dest.Write(src);
    MakeBuffer(dest);
    return write_ok;
  }
  if (ABSL_PREDICT_FALSE(!dest.Write(src.Extract(max_length)))) {
    MakeBuffer(dest);
    return false;
  }
  return FailLimitExceeded(dest);
}

bool LimitingWriterBase::SupportsRandomAccess() {
  Writer* const dest = DestWriter();
  return dest != nullptr && dest->SupportsRandomAccess();
}

bool LimitingWriterBase::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT_NE(new_pos, pos())
      << "Failed precondition of Writer::SeekSlow(): "
         "position unchanged, use Seek() instead";
  RIEGELI_ASSERT_LE(start_pos(), max_pos_)
      << "Failed invariant of LimitingWriterBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(dest))) return false;
  const Position pos_to_seek = UnsignedMin(new_pos, max_pos_);
  const bool seek_ok = dest.Seek(pos_to_seek);
  MakeBuffer(dest);
  return seek_ok && pos_to_seek == new_pos;
}

std::optional<Position> LimitingWriterBase::SizeImpl() {
  RIEGELI_ASSERT_LE(start_pos(), max_pos_)
      << "Failed invariant of LimitingWriterBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  Writer& dest = *DestWriter();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(dest))) return std::nullopt;
  const std::optional<Position> size = dest.Size();
  MakeBuffer(dest);
  return size;
}

bool LimitingWriterBase::SupportsTruncate() {
  Writer* const dest = DestWriter();
  return dest != nullptr && dest->SupportsTruncate();
}

bool LimitingWriterBase::TruncateImpl(Position new_size) {
  RIEGELI_ASSERT_LE(start_pos(), max_pos_)
      << "Failed invariant of LimitingWriterBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  if (ABSL_PREDICT_FALSE(pos() > max_pos_) && new_size <= max_pos_) {
    set_cursor(cursor() - IntCast<size_t>(pos() - max_pos_));
  }
  if (ABSL_PREDICT_FALSE(!SyncBuffer(dest))) return false;
  const bool truncate_ok = dest.Truncate(new_size);
  MakeBuffer(dest);
  return truncate_ok;
}

bool LimitingWriterBase::SupportsReadMode() {
  Writer* const dest = DestWriter();
  return dest != nullptr && dest->SupportsReadMode();
}

Reader* LimitingWriterBase::ReadModeImpl(Position initial_pos) {
  RIEGELI_ASSERT_LE(start_pos(), max_pos_)
      << "Failed invariant of LimitingWriterBase: "
         "position already exceeds its limit";
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  Writer& dest = *DestWriter();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(dest))) return nullptr;
  Reader* const reader = dest.ReadMode(initial_pos);
  MakeBuffer(dest);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/limiting_writer.h
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_LIMITING_WRITER_H_
#define RIEGELI_BYTES_LIMITING_WRITER_H_

#include <stddef.h>

#include <limits>
#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

class Reader;

// Template parameter independent part of `LimitingWriter`.
class LimitingWriterBase : public Writer {
 public:
  class Options {
   public:
    Options() noexcept {}

    // The limit expressed as an absolute position.
    //
    // `std::nullopt` means no limit, unless `max_length()` is set.
    //
    // Default: `std::nullopt`.
    Options& set_max_pos(std::optional<Position> max_pos) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      max_pos_ = max_pos;
      max_length_ = std::nullopt;
      return *this;
    }
    Options&& set_max_pos(std::optional<Position> max_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_max_pos(max_pos));
    }
    std::optional<Position> max_pos() const { return max_pos_; }

    // A shortcut for `set_max_pos(pos)` with `set_exact(true)`.
    Options& set_exact_pos(Position exact_pos) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return set_max_pos(exact_pos).set_exact(true);
    }
    Options&& set_exact_pos(Position exact_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_exact_pos(exact_pos));
    }

    // The limit expressed as a length relative to the current position.
    //
    // `std::nullopt` means no limit, unless `max_pos()` is set.
    //
    // Default: `std::nullopt`.
    Options& set_max_length(std::optional<Position> max_length) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      max_length_ = max_length;
      max_pos_ = std::nullopt;
      return *this;
    }
    Options&& set_max_length(std::optional<Position> max_length) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_max_length(max_length));
    }
    std::optional<Position> max_length() const { return max_length_; }

    // A shortcut for `set_max_length(length)` with `set_exact(true)`.
    Options& set_exact_length(Position exact_length) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return set_max_length(exact_length).set_exact(true);
    }
    Options&& set_exact_length(Position exact_length) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_exact_length(exact_length));
    }

    // If `false`, `LimitingWriter` will write data at most up to the limit.
    // Writing will fail if the limit is exceeded.
    //
    // If `true`, `LimitingWriter` will write data exactly up to the limit.
    // Writing will fail if the limit is exceeded, and `Close()` will fail if
    // the current position at that time is before the limit.
    //
    // Default: `false`.
    Options& set_exact(bool exact) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      exact_ = exact;
      return *this;
    }
    Options&& set_exact(bool exact) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_exact(exact));
    }
    bool exact() const { return exact_; }

   private:
    std::optional<Position> max_pos_;
    std::optional<Position> max_length_;
    bool exact_ = false;
  };

  // Returns the original `Writer`. Unchanged by `Close()`.
  virtual Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Accesses the limit expressed as an absolute position.
  //
  // If `set_max_length()` was used, `max_pos()` returns the same limit
  // translated to an absolute position.
  //
  // If no limit is set, returns `std::numeric_limits<Position>::max()`.
  void set_max_pos(Position max_pos);
  Position max_pos() const { return max_pos_; }

  // Accesses the limit expressed as a length relative to the current position,
  // i.e. the length remaining to the limit.
  //
  // If `set_max_pos()` was used, `max_length()` returns the same limit
  // translated to a length relative to the current position.
  //
  // If no limit is set, returns `std::numeric_limits<Position>::max() - pos()`.
  void set_max_length(Position max_length);
  Position max_length() const { return SaturatingSub(max_pos_, pos()); }

  // Clears the limit.
  void clear_limit() { max_pos_ = std::numeric_limits<Position>::max(); }

  bool SupportsRandomAccess() override;
  bool SupportsTruncate() override;
  bool SupportsReadMode() override;

 protected:
  explicit LimitingWriterBase(Closed) noexcept : Writer(kClosed) {}

  explicit LimitingWriterBase(bool exact);

  LimitingWriterBase(LimitingWriterBase&& that) noexcept;
  LimitingWriterBase& operator=(LimitingWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(bool exact);
  void Initialize(Writer* dest, const Options& options, bool is_owning);
  bool exact() const { return exact_; }

  // Sets cursor of `dest` to cursor of `*this`. Fails `*this` if the limit is
  // exceeded.
  bool SyncBuffer(Writer& dest);

  // Sets buffer pointers of `*this` to buffer pointers of `dest`. Fails `*this`
  // if `dest` failed.
  void MakeBuffer(Writer& dest);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using Writer::WriteSlow;
  bool WriteSlow(absl::string_view src) override;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(absl::Cord&& src) override;
  bool WriteSlow(ByteFill src) override;
  bool SeekSlow(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  bool TruncateImpl(Position new_size) override;
  Reader* ReadModeImpl(Position initial_pos) override;

 private:
  ABSL_ATTRIBUTE_COLD bool FailLimitExceeded();
  ABSL_ATTRIBUTE_COLD bool FailLimitExceeded(Writer& dest);
  ABSL_ATTRIBUTE_COLD void FailLengthOverflow(Position max_length);

  // This template is defined and used only in limiting_writer.cc.
  template <typename Src, typename RemoveSuffix>
  bool WriteInternal(Src&& src, RemoveSuffix&& remove_suffix);

  // Invariant: `start_pos() <= max_pos_`
  Position max_pos_ = std::numeric_limits<Position>::max();

  bool exact_ = false;

  // Invariants if `ok()`:
  //   `start() == DestWriter()->start()`
  //   `limit() == DestWriter()->limit()`
  //   `start_pos() == DestWriter()->start_pos()`
};

// A `Writer` which writes to another `Writer` up to the specified size limit.
// An attempt to write more fails, after writing to the destination everything
// up to the limit.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the original `Writer`. `Dest` must support
// `Dependency<Writer*, Dest>`, e.g. `Writer*` (not owned, default),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The original `Writer` must not be accessed until the `LimitingWriter` is
// closed or no longer used, except that it is allowed to read the destination
// of the original `Writer` immediately after `Flush()`.
template <typename Dest = Writer*>
class LimitingWriter : public LimitingWriterBase {
 public:
  // Creates a closed `LimitingWriter`.
  explicit LimitingWriter(Closed) noexcept : LimitingWriterBase(kClosed) {}

  // Will write to the original `Writer` provided by `dest`.
  explicit LimitingWriter(Initializer<Dest> dest, Options options = Options());

  LimitingWriter(LimitingWriter&& that) = default;
  LimitingWriter& operator=(LimitingWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `LimitingWriter`. This
  // avoids constructing a temporary `LimitingWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the original `Writer`.
  // Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  class Mover;

  // The object providing and possibly owning the original `Writer`.
  MovingDependency<Writer*, Dest, Mover> dest_;
};

explicit LimitingWriter(Closed) -> LimitingWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit LimitingWriter(Dest&& dest, LimitingWriterBase::Options options =
                                         LimitingWriterBase::Options())
    -> LimitingWriter<TargetT<Dest>>;

// Implementation details follow.

inline LimitingWriterBase::LimitingWriterBase(bool exact) : exact_(exact) {}

inline LimitingWriterBase::LimitingWriterBase(
    LimitingWriterBase&& that) noexcept
    : Writer(static_cast<Writer&&>(that)),
      max_pos_(that.max_pos_),
      exact_(that.exact_) {}

inline LimitingWriterBase& LimitingWriterBase::operator=(
    LimitingWriterBase&& that) noexcept {
  Writer::operator=(static_cast<Writer&&>(that));
  max_pos_ = that.max_pos_;
  exact_ = that.exact_;
  return *this;
}

inline void LimitingWriterBase::Reset(Closed) {
  Writer::Reset(kClosed);
  max_pos_ = std::numeric_limits<Position>::max();
  exact_ = false;
}

inline void LimitingWriterBase::Reset(bool exact) {
  Writer::Reset();
  max_pos_ = std::numeric_limits<Position>::max();
  exact_ = exact;
}

inline bool LimitingWriterBase::SyncBuffer(Writer& dest) {
  if (ABSL_PREDICT_FALSE(pos() > max_pos_)) {
    dest.set_cursor(cursor() - IntCast<size_t>(pos() - max_pos_));
    return FailLimitExceeded(dest);
  }
  dest.set_cursor(cursor());
  return true;
}

inline void LimitingWriterBase::MakeBuffer(Writer& dest) {
  set_buffer(dest.start(), dest.start_to_limit(), dest.start_to_cursor());
  set_start_pos(dest.start_pos());
  if (ABSL_PREDICT_FALSE(start_pos() > max_pos_)) {
    set_buffer(cursor());
    set_start_pos(max_pos_);
    FailLimitExceeded();
  }
  if (ABSL_PREDICT_FALSE(!dest.ok())) FailWithoutAnnotation(dest.status());
}

template <typename Dest>
class LimitingWriter<Dest>::Mover {
 public:
  static auto member() { return &LimitingWriter::dest_; }

  explicit Mover(LimitingWriter& self, LimitingWriter& that)
      : uses_buffer_(self.start() != nullptr) {
    // Buffer pointers are already moved so `SyncBuffer()` is called on `self`.
    // `dest_` is not moved yet so `dest_` is taken from `that`.
    if (uses_buffer_) {
      if (ABSL_PREDICT_FALSE(!self.SyncBuffer(*that.dest_))) {
        uses_buffer_ = false;
      }
    }
  }

  void Done(LimitingWriter& self) {
    if (uses_buffer_) self.MakeBuffer(*self.dest_);
  }

 private:
  bool uses_buffer_;
};

template <typename Dest>
inline LimitingWriter<Dest>::LimitingWriter(Initializer<Dest> dest,
                                            Options options)
    : LimitingWriterBase(options.exact()), dest_(std::move(dest)) {
  Initialize(dest_.get(), options, dest_.IsOwning());
}

template <typename Dest>
inline void LimitingWriter<Dest>::Reset(Closed) {
  LimitingWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void LimitingWriter<Dest>::Reset(Initializer<Dest> dest,
                                        Options options) {
  LimitingWriterBase::Reset(options.exact());
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options, dest_.IsOwning());
}

template <typename Dest>
void LimitingWriter<Dest>::Done() {
  LimitingWriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(dest_->status());
    }
  }
}

template <typename Dest>
void LimitingWriter<Dest>::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  if (dest_.IsOwning() && !exact()) {
    if (ABSL_PREDICT_FALSE(!SyncBuffer(*dest_))) return;
    dest_->SetWriteSizeHint(
        write_size_hint == std::nullopt
            ? std::nullopt
            : std::make_optional(UnsignedMin(*write_size_hint, max_length())));
    MakeBuffer(*dest_);
  }
}

template <typename Dest>
bool LimitingWriter<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(!SyncBuffer(*dest_))) return false;
  bool flush_ok = true;
  if (flush_type != FlushType::kFromObject || dest_.IsOwning()) {
    flush_ok = dest_->Flush(flush_type);
  }
  MakeBuffer(*dest_);
  return flush_ok;
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_LIMITING_WRITER_H_


================================================
FILE: riegeli/bytes/null_backward_writer.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/null_backward_writer.h"

#include <stddef.h>

#include <limits>
#include <optional>

#include "absl/base/optimization.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffer.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/buffer_options.h"

namespace riegeli {

void NullBackwardWriter::Done() {
  BackwardWriter::Done();
  buffer_ = Buffer();
}

inline void NullBackwardWriter::SyncBuffer() {
  set_start_pos(pos());
  set_cursor(start());
}

inline bool NullBackwardWriter::MakeBuffer(size_t min_length,
                                           size_t recommended_length) {
  if (ABSL_PREDICT_FALSE(min_length >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  const size_t buffer_length = UnsignedMin(
      buffer_sizer_.BufferLength(start_pos(), min_length, recommended_length),
      std::numeric_limits<Position>::max() - start_pos());
  buffer_.Reset(buffer_length);
  set_buffer(buffer_.data(), buffer_length);
  return true;
}

void NullBackwardWriter::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  buffer_sizer_.set_write_size_hint(pos(), write_size_hint);
}

bool NullBackwardWriter::PushSlow(size_t min_length,
                                  size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of BackwardWriter::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer();
  return MakeBuffer(min_length, recommended_length);
}

bool NullBackwardWriter::WriteSlow(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(string_view): "
         "enough space available, use Write(string_view) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer();
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  return MakeBuffer();
}

bool NullBackwardWriter::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer();
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  return MakeBuffer();
}

bool NullBackwardWriter::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer();
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  return MakeBuffer();
}

bool NullBackwardWriter::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer();
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  return MakeBuffer();
}

bool NullBackwardWriter::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer();
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  return MakeBuffer();
}

bool NullBackwardWriter::TruncateImpl(Position new_size) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (new_size >= start_pos()) {
    if (ABSL_PREDICT_FALSE(new_size > pos())) return false;
    set_cursor(start() - IntCast<size_t>(new_size - start_pos()));
    return true;
  }
  buffer_sizer_.EndRun(pos());
  set_start_pos(new_size);
  buffer_sizer_.BeginRun(start_pos());
  return MakeBuffer();
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/null_backward_writer.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_NULL_BACKWARD_WRITER_H_
#define RIEGELI_BYTES_NULL_BACKWARD_WRITER_H_

#include <stddef.h>

#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/buffer.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/buffer_options.h"

namespace riegeli {

// A `BackwardWriter` which discards all output.
//
// It tracks `pos()` normally.
class NullBackwardWriter : public BackwardWriter {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // `NullBackwardWriter` has a smaller default buffer size (512) so that
    // writing larger values is skipped altogether.
    static constexpr size_t kDefaultMinBufferSize = kMaxBytesToCopy + 1;
    static constexpr size_t kDefaultMaxBufferSize = kMaxBytesToCopy + 1;
  };

  // Creates a closed `NullBackwardWriter`.
  explicit NullBackwardWriter(Closed) noexcept : BackwardWriter(kClosed) {}

  // Will discard all output.
  NullBackwardWriter(Options options = Options()) noexcept;

  NullBackwardWriter(NullBackwardWriter&& that) noexcept;
  NullBackwardWriter& operator=(NullBackwardWriter&& that) noexcept;

  // Makes `*this` equivalent to a newly constructed `NullBackwardWriter`. This
  // avoids constructing a temporary `NullBackwardWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Options options = Options());

  bool SupportsTruncate() override { return true; }

 protected:
  void Done() override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using BackwardWriter::WriteSlow;
  bool WriteSlow(absl::string_view src) override;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(ByteFill src) override;
  bool TruncateImpl(Position new_size) override;

 private:
  // Resets buffer pointers to the beginning of the buffer.
  void SyncBuffer();

  // Ensures that the buffer has a sufficient size.
  bool MakeBuffer(size_t min_length = 0, size_t recommended_length = 0);

  WriteBufferSizer buffer_sizer_;
  Buffer buffer_;
};

// Implementation details follow.

inline NullBackwardWriter::NullBackwardWriter(Options options) noexcept
    : buffer_sizer_(options.buffer_options()) {}

inline NullBackwardWriter::NullBackwardWriter(
    NullBackwardWriter&& that) noexcept
    : BackwardWriter(static_cast<BackwardWriter&&>(that)),
      buffer_sizer_(that.buffer_sizer_),
      buffer_(std::move(that.buffer_)) {}

inline NullBackwardWriter& NullBackwardWriter::operator=(
    NullBackwardWriter&& that) noexcept {
  BackwardWriter::operator=(static_cast<BackwardWriter&&>(that));
  buffer_sizer_ = that.buffer_sizer_;
  buffer_ = std::move(that.buffer_);
  return *this;
}

inline void NullBackwardWriter::Reset(Closed) {
  BackwardWriter::Reset(kClosed);
  buffer_sizer_.Reset();
  buffer_ = Buffer();
}

inline void NullBackwardWriter::Reset(Options options) {
  BackwardWriter::Reset();
  buffer_sizer_.Reset(options.buffer_options());
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_NULL_BACKWARD_WRITER_H_


================================================
FILE: riegeli/bytes/null_writer.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/null_writer.h"

#include <stddef.h>

#include <limits>
#include <optional>

#include "absl/base/optimization.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffer.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void NullWriter::Done() {
  Writer::Done();
  buffer_ = Buffer();
}

inline void NullWriter::SyncBuffer() {
  set_start_pos(pos());
  set_cursor(start());
}

inline bool NullWriter::MakeBuffer(size_t min_length,
                                   size_t recommended_length) {
  if (ABSL_PREDICT_FALSE(min_length >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  const size_t buffer_length = UnsignedMin(
      buffer_sizer_.BufferLength(start_pos(), min_length, recommended_length),
      std::numeric_limits<Position>::max() - start_pos());
  buffer_.Reset(buffer_length);
  set_buffer(buffer_.data(), buffer_length);
  return true;
}

void NullWriter::SetWriteSizeHintImpl(std::optional<Position> write_size_hint) {
  buffer_sizer_.set_write_size_hint(pos(), write_size_hint);
}

bool NullWriter::PushSlow(size_t min_length, size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Writer::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer();
  return MakeBuffer(min_length, recommended_length);
}

bool NullWriter::WriteSlow(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of Writer::WriteSlow(string_view): "
         "enough space available, use Write(string_view) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer();
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  return MakeBuffer();
}

bool NullWriter::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer();
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  return MakeBuffer();
}

bool NullWriter::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer();
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  return MakeBuffer();
}

bool NullWriter::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer();
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  return MakeBuffer();
}

bool NullWriter::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer();
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  return MakeBuffer();
}

bool NullWriter::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT_NE(new_pos, pos())
      << "Failed precondition of Writer::SeekSlow(): "
         "position unchanged, use Seek() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const Position size = UnsignedMax(pos(), written_size_);
  if (new_pos >= start_pos() && new_pos <= pos()) {
    written_size_ = size;
    set_cursor(start() + IntCast<size_t>(new_pos - start_pos()));
    return true;
  }
  buffer_sizer_.EndRun(pos());
  if (ABSL_PREDICT_FALSE(new_pos > size)) {
    set_start_pos(size);
    buffer_sizer_.BeginRun(start_pos());
    MakeBuffer();
    return false;
  }
  written_size_ = size;
  set_start_pos(new_pos);
  buffer_sizer_.BeginRun(start_pos());
  return MakeBuffer();
}

std::optional<Position> NullWriter::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  return UnsignedMax(pos(), written_size_);
}

bool NullWriter::TruncateImpl(Position new_size) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const Position size = UnsignedMax(pos(), written_size_);
  if (new_size >= start_pos() && new_size <= pos()) {
    written_size_ = new_size;
    set_cursor(start() + IntCast<size_t>(new_size - start_pos()));
    return true;
  }
  buffer_sizer_.EndRun(pos());
  if (ABSL_PREDICT_FALSE(new_size > size)) {
    set_start_pos(size);
    buffer_sizer_.BeginRun(start_pos());
    MakeBuffer();
    return false;
  }
  written_size_ = new_size;
  set_start_pos(new_size);
  buffer_sizer_.BeginRun(start_pos());
  return MakeBuffer();
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/null_writer.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_NULL_WRITER_H_
#define RIEGELI_BYTES_NULL_WRITER_H_

#include <stddef.h>

#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/buffer.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

// A `Writer` which discards all output.
//
// It tracks `pos()` normally.
class NullWriter : public Writer {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // `NullWriter` has a smaller default buffer size (1024) so that writing
    // larger values is skipped altogether.
    static constexpr size_t kDefaultMinBufferSize = 1 << 10;
    static constexpr size_t kDefaultMaxBufferSize = 1 << 10;
  };

  // Creates a closed `NullWriter`.
  explicit NullWriter(Closed) noexcept : Writer(kClosed) {}

  // Will discard all output.
  explicit NullWriter(Options options = Options()) noexcept;

  NullWriter(NullWriter&& that) noexcept;
  NullWriter& operator=(NullWriter&& that) noexcept;

  // Makes `*this` equivalent to a newly constructed `NullWriter`. This avoids
  // constructing a temporary `NullWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Options options = Options());

  bool SupportsRandomAccess() override { return true; }

 protected:
  void Done() override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using Writer::WriteSlow;
  bool WriteSlow(absl::string_view src) override;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(ByteFill src) override;
  bool SeekSlow(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  bool TruncateImpl(Position new_size) override;

 private:
  // Resets buffer pointers to the beginning of the buffer.
  void SyncBuffer();

  // Ensures that the buffer has a sufficient size.
  bool MakeBuffer(size_t min_length = 0, size_t recommended_length = 0);

  WriteBufferSizer buffer_sizer_;
  Buffer buffer_;

  // Size of written data is always `UnsignedMax(pos(), written_size_)`.
  // This is used to determine the size after seeking backwards.
  Position written_size_ = 0;
};

// Implementation details follow.

inline NullWriter::NullWriter(Options options) noexcept
    : buffer_sizer_(options.buffer_options()) {}

inline NullWriter::NullWriter(NullWriter&& that) noexcept
    : Writer(static_cast<Writer&&>(that)),
      buffer_sizer_(that.buffer_sizer_),
      buffer_(std::move(that.buffer_)),
      written_size_(that.written_size_) {}

inline NullWriter& NullWriter::operator=(NullWriter&& that) noexcept {
  Writer::operator=(static_cast<Writer&&>(that));
  buffer_sizer_ = that.buffer_sizer_;
  buffer_ = std::move(that.buffer_);
  written_size_ = that.written_size_;
  return *this;
}

inline void NullWriter::Reset(Closed) {
  Writer::Reset(kClosed);
  buffer_sizer_.Reset();
  buffer_ = Buffer();
  written_size_ = 0;
}

inline void NullWriter::Reset(Options options) {
  Writer::Reset();
  buffer_sizer_.Reset(options.buffer_options());
  written_size_ = 0;
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_NULL_WRITER_H_


================================================
FILE: riegeli/bytes/ostream_writer.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/ostream_writer.h"

#include <stddef.h>

#include <cerrno>
#include <ios>
#include <istream>
#include <limits>
#include <optional>
#include <ostream>
#include <string>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/global.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/istream_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void OStreamWriterBase::Initialize(std::ostream* dest,
                                   std::optional<Position> assumed_pos,
                                   bool assumed_append) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of OStreamWriter: null stream pointer";
  RIEGELI_ASSERT_EQ(supports_random_access_, LazyBoolState::kUnknown)
      << "Failed precondition of OStreamWriterBase::Initialize(): "
         "supports_random_access_ not reset";
  RIEGELI_ASSERT_EQ(supports_read_mode_, LazyBoolState::kUnknown)
      << "Failed precondition of OStreamWriterBase::Initialize(): "
         "supports_read_mode_ not reset";
  RIEGELI_ASSERT_OK(random_access_status_)
      << "Failed precondition of OStreamWriterBase::Initialize(): "
         "random_access_status_ not reset";
  RIEGELI_ASSERT_OK(read_mode_status_)
      << "Failed precondition of OStreamWriterBase::Initialize(): "
         "read_mode_status_ not reset";
  if (ABSL_PREDICT_FALSE(dest->fail())) {
    // Either constructing the stream failed or the stream was already in a
    // failed state. In any case `OStreamWriterBase` should fail.
    FailOperation("ostream::ostream()");
    return;
  }
  if (assumed_pos != std::nullopt) {
    if (ABSL_PREDICT_FALSE(
            *assumed_pos >
            Position{std::numeric_limits<std::streamoff>::max()})) {
      FailOverflow();
      return;
    }
    set_start_pos(*assumed_pos);
    supports_random_access_ = LazyBoolState::kFalse;
    supports_read_mode_ = LazyBoolState::kFalse;
    random_access_status_ = Global([] {
      return absl::UnimplementedError(
          "OStreamWriterBase::Options::assumed_pos() excludes random access");
    });
    read_mode_status_ = random_access_status_;
  } else {
    errno = 0;
    const std::streamoff stream_pos = dest->tellp();
    if (stream_pos < 0) {
      // Random access is not supported. Assume 0 as the initial position.
      supports_random_access_ = LazyBoolState::kFalse;
      supports_read_mode_ = LazyBoolState::kFalse;
      random_access_status_ = FailedOperationStatus("ostream::tellp()");
      read_mode_status_ = random_access_status_;
      return;
    }
    set_start_pos(IntCast<Position>(stream_pos));
    if (assumed_append) {
      supports_random_access_ = LazyBoolState::kFalse;
      // `supports_read_mode_` is left as `LazyBoolState::kUnknown`.
      random_access_status_ = Global([] {
        return absl::UnimplementedError("Append mode excludes random access");
      });
    } else {
      // `std::ostream::tellp()` succeeded, and `std::ostream::seekp()` will be
      // checked later. `supports_random_access_` and `supports_read_mode_` are
      // left as `LazyBoolState::kUnknown`.
    }
  }
  BeginRun();
}

void OStreamWriterBase::Done() {
  BufferedWriter::Done();
  random_access_status_ = absl::OkStatus();
  read_mode_status_ = absl::OkStatus();
  associated_reader_.Reset();
}

inline absl::Status OStreamWriterBase::FailedOperationStatus(
    absl::string_view operation) {
  // There is no way to get details why a stream operation failed without
  // letting the stream throw exceptions. Hopefully low level failures have set
  // `errno` as a side effect.
  //
  // This requires resetting `errno` to 0 before the stream operation because
  // the operation may fail without setting `errno`.
  const int error_number = errno;
  const std::string message = absl::StrCat(operation, " failed");
  return error_number == 0 ? absl::UnknownError(message)
                           : absl::ErrnoToStatus(error_number, message);
}

bool OStreamWriterBase::FailOperation(absl::string_view operation) {
  return Fail(FailedOperationStatus(operation));
}

bool OStreamWriterBase::SupportsRandomAccess() {
  if (ABSL_PREDICT_TRUE(supports_random_access_ != LazyBoolState::kUnknown)) {
    return supports_random_access_ == LazyBoolState::kTrue;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  std::ostream& dest = *DestStream();
  errno = 0;
  dest.seekp(0, std::ios_base::end);
  if (dest.fail()) {
    // Not supported.
    supports_random_access_ = LazyBoolState::kFalse;
    random_access_status_ = FailedOperationStatus("ostream::seekp()");
    dest.clear(dest.rdstate() & ~std::ios_base::failbit);
    return false;
  }
  // Supported.
  dest.seekp(IntCast<std::streamoff>(start_pos()), std::ios_base::beg);
  if (ABSL_PREDICT_FALSE(dest.fail())) return FailOperation("ostream::seekp()");
  supports_random_access_ = LazyBoolState::kTrue;
  return true;
}

bool OStreamWriterBase::SupportsReadMode() {
  if (ABSL_PREDICT_TRUE(supports_read_mode_ != LazyBoolState::kUnknown)) {
    return supports_read_mode_ == LazyBoolState::kTrue;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  std::istream* const src = SrcStream();
  if (src == nullptr) {
    supports_read_mode_ = LazyBoolState::kFalse;
    read_mode_status_ = Global([] {
      return absl::UnimplementedError(
          "Read mode requires the static type of the destination "
          "deriving from std::istream");
    });
    return false;
  }
  errno = 0;
  const std::streamoff stream_pos = src->tellg();
  if (stream_pos < 0) {
    // Not supported.
    supports_read_mode_ = LazyBoolState::kFalse;
    read_mode_status_ = FailedOperationStatus("istream::tellg()");
    return false;
  }
  src->seekg(0, std::ios_base::end);
  if (src->fail()) {
    // Not supported.
    supports_read_mode_ = LazyBoolState::kFalse;
    read_mode_status_ = FailedOperationStatus("istream::seekg()");
    src->clear(src->rdstate() & ~std::ios_base::failbit);
    return false;
  }
  // Supported.
  supports_read_mode_ = LazyBoolState::kTrue;
  std::ostream& dest = *DestStream();
  dest.seekp(IntCast<std::streamoff>(start_pos()), std::ios_base::beg);
  if (ABSL_PREDICT_FALSE(dest.fail())) return FailOperation("ostream::seekp()");
  return true;
}

inline bool OStreamWriterBase::WriteMode() {
  if (ABSL_PREDICT_TRUE(!read_mode_)) return true;
  read_mode_ = false;
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  std::ostream& dest = *DestStream();
  errno = 0;
  dest.seekp(IntCast<std::streamoff>(start_pos()), std::ios_base::beg);
  if (ABSL_PREDICT_FALSE(dest.fail())) return FailOperation("ostream::seekp()");
  return true;
}

bool OStreamWriterBase::WriteInternal(absl::string_view src) {
  RIEGELI_ASSERT(!src.empty())
      << "Failed precondition of BufferedWriter::WriteInternal(): "
         "nothing to write";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedWriter::WriteInternal()";
  if (ABSL_PREDICT_FALSE(!WriteMode())) return false;
  std::ostream& dest = *DestStream();
  if (ABSL_PREDICT_FALSE(src.size() >
                         Position{std::numeric_limits<std::streamoff>::max()} -
                             start_pos())) {
    return FailOverflow();
  }
  errno = 0;
  do {
    dest.write(src.data(), IntCast<std::streamsize>(src.size()));
    if (ABSL_PREDICT_FALSE(dest.fail())) {
      return FailOperation("ostream::write()");
    }
    move_start_pos(src.size());
    src.remove_prefix(src.size());
  } while (!src.empty());
  return true;
}

bool OStreamWriterBase::FlushBehindBuffer(absl::string_view src,
                                          FlushType flush_type) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::FlushBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!WriteMode())) return false;
  return BufferedWriter::FlushBehindBuffer(src, flush_type);
}

bool OStreamWriterBase::SeekBehindBuffer(Position new_pos) {
  RIEGELI_ASSERT_NE(new_pos, pos())
      << "Failed precondition of BufferedWriter::SeekBehindBuffer(): "
         "position unchanged, use Seek() instead";
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::SeekBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!OStreamWriterBase::SupportsRandomAccess())) {
    if (ok()) Fail(random_access_status_);
    return false;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  read_mode_ = false;
  std::ostream& dest = *DestStream();
  errno = 0;
  if (new_pos > start_pos()) {
    // Seeking forwards.
    dest.seekp(0, std::ios_base::end);
    if (ABSL_PREDICT_FALSE(dest.fail())) {
      return FailOperation("ostream::seekp()");
    }
    const std::streamoff stream_size = dest.tellp();
    if (ABSL_PREDICT_FALSE(stream_size < 0)) {
      return FailOperation("ostream::tellp()");
    }
    if (ABSL_PREDICT_FALSE(new_pos > IntCast<Position>(stream_size))) {
      // Stream ends.
      set_start_pos(IntCast<Position>(stream_size));
      return false;
    }
  }
  dest.seekp(IntCast<std::streamoff>(new_pos), std::ios_base::beg);
  if (ABSL_PREDICT_FALSE(dest.fail())) return FailOperation("ostream::seekp()");
  set_start_pos(new_pos);
  return true;
}

std::optional<Position> OStreamWriterBase::SizeBehindBuffer() {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::SizeBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!OStreamWriterBase::SupportsRandomAccess())) {
    if (ok()) Fail(random_access_status_);
    return std::nullopt;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  read_mode_ = false;
  std::ostream& dest = *DestStream();
  errno = 0;
  dest.seekp(0, std::ios_base::end);
  if (ABSL_PREDICT_FALSE(dest.fail())) {
    FailOperation("ostream::seekp()");
    return std::nullopt;
  }
  const std::streamoff stream_size = dest.tellp();
  if (ABSL_PREDICT_FALSE(stream_size < 0)) {
    FailOperation("ostream::tellp()");
    return std::nullopt;
  }
  dest.seekp(IntCast<std::streamoff>(start_pos()), std::ios_base::beg);
  if (ABSL_PREDICT_FALSE(dest.fail())) {
    FailOperation("ostream::seekp()");
    return std::nullopt;
  }
  return IntCast<Position>(stream_size);
}

Reader* OStreamWriterBase::ReadModeBehindBuffer(Position initial_pos) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::ReadModeBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!OStreamWriterBase::SupportsReadMode())) {
    if (ok()) Fail(read_mode_status_);
    return nullptr;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  std::istream& src = *SrcStream();
  IStreamReader<>* const reader = associated_reader_.ResetReader(
      &src, IStreamReaderBase::Options().set_buffer_options(buffer_options()));
  reader->Seek(initial_pos);
  read_mode_ = true;
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/ostream_writer.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_OSTREAM_WRITER_H_
#define RIEGELI_BYTES_OSTREAM_WRITER_H_

#include <stdint.h>

#include <cerrno>
#include <istream>
#include <optional>
#include <ostream>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/iostream_internal.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

template <typename Src>
class IStreamReader;
class Reader;

// Template parameter independent part of `OStreamWriter`.
class OStreamWriterBase : public BufferedWriter {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // If `std::nullopt`, the current position reported by `pos()` corresponds
    // to the current stream position if possible, otherwise 0 is assumed as the
    // initial position. Random access is supported if the stream supports
    // random access.
    //
    // If not `std::nullopt`, this position is assumed initially, to be reported
    // by `pos()`. It does not need to correspond to the current stream
    // position. Random access is not supported.
    //
    // Warning: On Windows this must not be `std::nullopt` if the stream is a
    // `std::ofstream` or `std::fstream` opened in text mode.
    //
    // Default: `std::nullopt`.
    Options& set_assumed_pos(std::optional<Position> assumed_pos) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      assumed_pos_ = assumed_pos;
      return *this;
    }
    Options&& set_assumed_pos(std::optional<Position> assumed_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_assumed_pos(assumed_pos));
    }
    std::optional<Position> assumed_pos() const { return assumed_pos_; }

    // If `assumed_pos()` is not set, `assumed_append()` should be set to `true`
    // if the `std::ostream` refers a file open in append mode, i.e. if all
    // writes happen at the end. This lets `SupportsRandomAccess()` correctly
    // return `false`.
    //
    // Default: `false`.
    Options& set_assumed_append(bool assumed_append) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      assumed_append_ = assumed_append;
      return *this;
    }
    Options&& set_assumed_append(bool assumed_append) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_assumed_append(assumed_append));
    }
    bool assumed_append() const { return assumed_append_; }

   private:
    std::optional<Position> assumed_pos_;
    bool assumed_append_ = false;
  };

  // Returns the stream being written to. Unchanged by `Close()`.
  virtual std::ostream* DestStream() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool SupportsRandomAccess() override;
  bool SupportsTruncate() override { return false; }
  bool SupportsReadMode() override;

 protected:
  explicit OStreamWriterBase(Closed) noexcept : BufferedWriter(kClosed) {}

  explicit OStreamWriterBase(BufferOptions buffer_options);

  OStreamWriterBase(OStreamWriterBase&& that) noexcept;
  OStreamWriterBase& operator=(OStreamWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options);
  void Initialize(std::ostream* dest, std::optional<Position> assumed_pos,
                  bool assumed_append);
  ABSL_ATTRIBUTE_COLD bool FailOperation(absl::string_view operation);

  // Returns the stream pointer as `std::istream*` if the static type of the
  // destination derives from `std::istream`, otherwise returns `nullptr`.
  virtual std::istream* SrcStream() const = 0;

  void Done() override;
  bool WriteInternal(absl::string_view src) override;
  bool FlushBehindBuffer(absl::string_view src, FlushType flush_type) override;
  bool SeekBehindBuffer(Position new_pos) override;
  std::optional<Position> SizeBehindBuffer() override;
  Reader* ReadModeBehindBuffer(Position initial_pos) override;

 private:
  // Encodes a `bool` or a marker that the value is not resolved yet.
  enum class LazyBoolState : uint8_t { kUnknown, kTrue, kFalse };

  absl::Status FailedOperationStatus(absl::string_view operation);

  bool WriteMode();

  LazyBoolState supports_random_access_ = LazyBoolState::kUnknown;
  LazyBoolState supports_read_mode_ = LazyBoolState::kUnknown;
  absl::Status random_access_status_;
  absl::Status read_mode_status_;

  AssociatedReader<IStreamReader<std::istream*>> associated_reader_;
  bool read_mode_ = false;

  // Invariant: `start_pos() <= std::numeric_limits<std::streamoff>::max()`
};

// A `Writer` which writes to a `std::ostream`.
//
// `OStreamWriter` supports random access if
// `Options::assumed_pos() == std::nullopt` and the stream supports random
// access (this is checked by calling `std::ostream::tellp()` and
// `std::ostream::seekp()` to the end and back).
//
// `OStreamWriter` supports `ReadMode()` if the static type of the stream
// derives also from `std::istream`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the stream being written to. `Dest` must support
// `Dependency<std::ostream*, Dest>`, e.g. `std::ostream*` (not owned, default),
// `std::ofstream` (owned), `std::unique_ptr<std::ostream>` (owned),
// `Any<std::ostream*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// Until the `OStreamWriter` is closed or no longer used, the `std::ostream`
// must not be closed nor have its position changed, except that if random
// access is not used, careful interleaving of multiple writers is possible:
// `Flush()` is needed before switching to another writer, and `pos()` does not
// take other writers into account.
template <typename Dest = std::ostream*>
class OStreamWriter : public OStreamWriterBase {
 public:
  // Creates a closed `OStreamWriter`.
  explicit OStreamWriter(Closed) noexcept : OStreamWriterBase(kClosed) {}

  // Will write to the stream provided by `dest`.
  explicit OStreamWriter(Initializer<Dest> dest, Options options = Options());

  OStreamWriter(OStreamWriter&& that) = default;
  OStreamWriter& operator=(OStreamWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `OStreamWriter`. This
  // avoids constructing a temporary `OStreamWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the stream being written
  // to. Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  std::ostream* DestStream() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  std::istream* SrcStream() const override;

  void Done() override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  // The object providing and possibly owning the stream being written to.
  Dependency<std::ostream*, Dest> dest_;
};

explicit OStreamWriter(Closed) -> OStreamWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit OStreamWriter(Dest&& dest, OStreamWriterBase::Options options =
                                        OStreamWriterBase::Options())
    -> OStreamWriter<TargetT<Dest>>;

// Implementation details follow.

inline OStreamWriterBase::OStreamWriterBase(BufferOptions buffer_options)
    : BufferedWriter(buffer_options) {
  // Clear `errno` so that `Initialize()` can attribute failures to opening the
  // stream.
  errno = 0;
}

inline OStreamWriterBase::OStreamWriterBase(OStreamWriterBase&& that) noexcept
    : BufferedWriter(static_cast<BufferedWriter&&>(that)),
      supports_random_access_(
          std::exchange(that.supports_random_access_, LazyBoolState::kUnknown)),
      supports_read_mode_(
          std::exchange(that.supports_read_mode_, LazyBoolState::kUnknown)),
      random_access_status_(std::move(that.random_access_status_)),
      read_mode_status_(std::move(that.read_mode_status_)),
      associated_reader_(std::move(that.associated_reader_)),
      read_mode_(that.read_mode_) {}

inline OStreamWriterBase& OStreamWriterBase::operator=(
    OStreamWriterBase&& that) noexcept {
  BufferedWriter::operator=(static_cast<BufferedWriter&&>(that));
  supports_random_access_ =
      std::exchange(that.supports_random_access_, LazyBoolState::kUnknown);
  supports_read_mode_ =
      std::exchange(that.supports_read_mode_, LazyBoolState::kUnknown);
  random_access_status_ = std::move(that.random_access_status_);
  read_mode_status_ = std::move(that.read_mode_status_);
  associated_reader_ = std::move(that.associated_reader_);
  read_mode_ = that.read_mode_;
  return *this;
}

inline void OStreamWriterBase::Reset(Closed) {
  BufferedWriter::Reset(kClosed);
  supports_random_access_ = LazyBoolState::kUnknown;
  supports_read_mode_ = LazyBoolState::kUnknown;
  random_access_status_ = absl::OkStatus();
  read_mode_status_ = absl::OkStatus();
  associated_reader_.Reset();
  read_mode_ = false;
}

inline void OStreamWriterBase::Reset(BufferOptions buffer_options) {
  BufferedWriter::Reset(buffer_options);
  supports_random_access_ = LazyBoolState::kUnknown;
  supports_read_mode_ = LazyBoolState::kUnknown;
  random_access_status_ = absl::OkStatus();
  read_mode_status_ = absl::OkStatus();
  associated_reader_.Reset();
  read_mode_ = false;
  // Clear `errno` so that `Initialize()` can attribute failures to opening the
  // stream.
  errno = 0;
}

template <typename Dest>
inline OStreamWriter<Dest>::OStreamWriter(Initializer<Dest> dest,
                                          Options options)
    : OStreamWriterBase(options.buffer_options()), dest_(std::move(dest)) {
  Initialize(dest_.get(), options.assumed_pos(), options.assumed_append());
}

template <typename Dest>
inline void OStreamWriter<Dest>::Reset(Closed) {
  OStreamWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void OStreamWriter<Dest>::Reset(Initializer<Dest> dest,
                                       Options options) {
  OStreamWriterBase::Reset(options.buffer_options());
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options.assumed_pos(), options.assumed_append());
}

template <typename Dest>
inline std::istream* OStreamWriter<Dest>::SrcStream() const {
  return iostream_internal::DetectIStream(dest_.get());
}

template <typename Dest>
void OStreamWriter<Dest>::Done() {
  OStreamWriterBase::Done();
  if (dest_.IsOwning()) {
    errno = 0;
    iostream_internal::Close(*dest_);
    if (ABSL_PREDICT_FALSE(dest_->fail()) && ABSL_PREDICT_TRUE(ok())) {
      FailOperation("ostream::close()");
    }
  }
}

template <typename Dest>
bool OStreamWriter<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!OStreamWriterBase::FlushImpl(flush_type))) {
    return false;
  }
  switch (flush_type) {
    case FlushType::kFromObject:
      if (!dest_.IsOwning()) return true;
      ABSL_FALLTHROUGH_INTENDED;
    case FlushType::kFromProcess:
    case FlushType::kFromMachine:
      errno = 0;
      dest_->flush();
      if (ABSL_PREDICT_FALSE(dest_->fail())) {
        return FailOperation("ostream::flush()");
      }
      return true;
  }
  RIEGELI_ASSUME_UNREACHABLE()
      << "Unknown flush type: " << static_cast<int>(flush_type);
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_OSTREAM_WRITER_H_


================================================
FILE: riegeli/bytes/path_ref.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_PATH_REF_H_
#define RIEGELI_BYTES_PATH_REF_H_

#include <stddef.h>

#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/invoker.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/string_ref.h"
#include "riegeli/base/temporary_storage.h"
#include "riegeli/base/type_traits.h"

#if defined(__cpp_lib_filesystem) && __cpp_lib_filesystem >= 201703
#include <filesystem>
#endif

namespace riegeli {

// Filename used for default-constructed or moved-from objects.
constexpr char kDefaultFilenameCStr[] = "<none>";
constexpr absl::string_view kDefaultFilename = kDefaultFilenameCStr;

// `PathRef` stores an `absl::string_view` representing a file path.
//
// It is intended for function parameters when the implementation needs
// an `absl::string_view`, and the caller might have another representation
// of the string.
//
// It is convertible from:
//  * types convertible to `absl::string_view`
//  * types convertible to `std::string`, e.g. `PathInitializer`
//  * `std::filesystem::path`
//
// For `std::filesystem::path` with `value_type = char`, it refers to
// `path.native()`.
//
// For `std::filesystem::path` with `value_type = wchar_t`, it refers to
// `path.string()` stored in a storage object passed as a default argument to
// the constructor.
//
// `PathRef` does not own path contents and is efficiently copyable.
class PathRef : public StringRef, public WithCompare<PathRef> {
 public:
  // Stores an empty `absl::string_view`.
  PathRef() = default;

  // Stores `str` converted to `absl::string_view`.
  ABSL_ATTRIBUTE_ALWAYS_INLINE
  /*implicit*/ PathRef(const char* str ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : StringRef(absl::string_view(str)) {}

  // Stores `str` converted to `StringRef` and then to `absl::string_view`.
  template <typename T,
            std::enable_if_t<
                std::conjunction_v<NotSameRef<PathRef, T>,
                                   std::is_convertible<T&&, absl::string_view>>,
                int> = 0>
  /*implicit*/ PathRef(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : StringRef(std::forward<T>(str)) {}

  // Stores `str` materialized, then converted to `StringRef` and then to
  // `absl::string_view`.
  template <typename T,
            std::enable_if_t<
                std::conjunction_v<
                    NotSameRef<PathRef, T>,
                    std::negation<std::is_convertible<T&&, absl::string_view>>,
                    std::is_convertible<T&&, std::string>>,
                int> = 0>
  /*implicit*/ PathRef(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND,
                       TemporaryStorage<std::string>&& storage
                           ABSL_ATTRIBUTE_LIFETIME_BOUND = {})
      : StringRef(std::forward<T>(str), std::move(storage)) {}

#if __cpp_lib_filesystem >= 201703

  // For `std::filesystem::path` with `value_type = char`, stores a reference to
  // `path.native()`.
  template <
      typename DependentPath = std::filesystem::path,
      std::enable_if_t<std::is_same_v<typename DependentPath::value_type, char>,
                       int> = 0>
  /*implicit*/ PathRef(
      const std::filesystem::path& path ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : StringRef(static_cast<const DependentPath&>(path).native()) {}
  template <
      typename DependentPath = std::filesystem::path,
      std::enable_if_t<std::is_same_v<typename DependentPath::value_type, char>,
                       int> = 0>
  /*implicit*/ PathRef(
      const std::filesystem::path& path ABSL_ATTRIBUTE_LIFETIME_BOUND,
      ABSL_ATTRIBUTE_UNUSED TemporaryStorage<std::string>&& storage)
      : PathRef(path) {}

  // For `std::filesystem::path` with `value_type = wchar_t`, stores a reference
  // to `path.string()` stored in a storage object passed as a default argument
  // to this constructor.
  template <
      typename DependentPath = std::filesystem::path,
      std::enable_if_t<
          !std::is_same_v<typename DependentPath::value_type, char>, int> = 0>
  /*implicit*/ PathRef(const std::filesystem::path& path,
                       TemporaryStorage<std::string>&& storage
                           ABSL_ATTRIBUTE_LIFETIME_BOUND = {})
      : StringRef(std::move(storage).emplace(
            riegeli::Invoker([&path] { return path.string(); }))) {}

#endif

  PathRef(const PathRef& that) = default;
  PathRef& operator=(const PathRef&) = delete;

  friend bool operator==(PathRef a, PathRef b) {
    return absl::string_view(a) == absl::string_view(b);
  }
  friend riegeli::StrongOrdering RIEGELI_COMPARE(PathRef a, PathRef b) {
    return riegeli::Compare(absl::string_view(a), absl::string_view(b));
  }

  template <
      typename T,
      std::enable_if_t<std::conjunction_v<NotSameRef<PathRef, T>,
                                          std::is_convertible<T&&, StringRef>>,
                       int> = 0>
  friend bool operator==(PathRef a, T&& b) {
    return a == PathRef(std::forward<T>(b));
  }
  template <
      typename T,
      std::enable_if_t<std::conjunction_v<NotSameRef<PathRef, T>,
                                          std::is_convertible<T&&, StringRef>>,
                       int> = 0>
  friend riegeli::StrongOrdering RIEGELI_COMPARE(PathRef a, T&& b) {
    return riegeli::Compare(a, PathRef(std::forward<T>(b)));
  }

#if __cpp_lib_filesystem >= 201703

  friend bool operator==(PathRef a, const std::filesystem::path& b) {
    return a == PathRef(b);
  }
  friend riegeli::StrongOrdering RIEGELI_COMPARE(
      PathRef a, const std::filesystem::path& b) {
    return riegeli::Compare(a, PathRef(b));
  }

#endif
};

// `PathInitializer` is convertible from the same types as `PathRef`,
// but efficiently takes ownership of `std::string`.
//
// `PathInitializer` behaves like `Initializer<std::string>`.
class PathInitializer : public Initializer<std::string> {
 public:
#if __cpp_lib_filesystem >= 201703
  class StringFromPath {
   public:
    explicit StringFromPath(
        const std::filesystem::path& path ABSL_ATTRIBUTE_LIFETIME_BOUND)
        : path_(path) {}

    /*implicit*/ operator std::string() const { return path_.string(); }

   private:
    const std::filesystem::path& path_;
  };
#endif

  PathInitializer() = default;

  // Stores `str` converted to `absl::string_view` and then to `std::string`.
  ABSL_ATTRIBUTE_ALWAYS_INLINE
  /*implicit*/ PathInitializer(const char* str ABSL_ATTRIBUTE_LIFETIME_BOUND,
                               TemporaryStorage<MakerType<absl::string_view>>&&
                                   storage ABSL_ATTRIBUTE_LIFETIME_BOUND = {})
      : Initializer(std::move(storage).emplace(absl::string_view(str))) {}

  // Stores `str` converted to `std::string`.
  template <typename T,
            std::enable_if_t<
                std::conjunction_v<NotSameRef<PathInitializer, T>,
                                   std::is_convertible<T&&, std::string>>,
                int> = 0>
  /*implicit*/ PathInitializer(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : Initializer(std::forward<T>(str)) {}

#if __cpp_lib_filesystem >= 201703
  // Stores `path.string()`.
  /*implicit*/ PathInitializer(const std::filesystem::path& path
                                   ABSL_ATTRIBUTE_LIFETIME_BOUND,
                               TemporaryStorage<StringFromPath>&& storage
                                   ABSL_ATTRIBUTE_LIFETIME_BOUND = {})
      : Initializer(std::move(storage).emplace(path)) {}
#endif

  // Stores `str` converted to `PathRef`, then to `absl::string_view`, and then
  // to `std::string`.
  template <
      typename T,
      std::enable_if_t<std::conjunction_v<
                           NotSameRef<PathInitializer, T>,
                           std::negation<std::is_convertible<T&&, std::string>>,
#if __cpp_lib_filesystem >= 201703
                           NotSameRef<std::filesystem::path, T>,
#endif
                           std::is_convertible<T&&, StringRef>>,
                       int> = 0>
  /*implicit*/ PathInitializer(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND,
                               TemporaryStorage<MakerType<absl::string_view>>&&
                                   storage ABSL_ATTRIBUTE_LIFETIME_BOUND = {})
      : Initializer(
            std::move(storage).emplace(StringRef(std::forward<T>(str)))) {
  }

  PathInitializer(PathInitializer&& that) = default;
  PathInitializer& operator=(PathInitializer&&) = delete;
};

}  // namespace riegeli

#endif  // RIEGELI_BYTES_PATH_REF_H_


================================================
FILE: riegeli/bytes/position_shifting_backward_writer.cc
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/position_shifting_backward_writer.h"

#include <stddef.h>

#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/object.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"

namespace riegeli {

void PositionShiftingBackwardWriterBase::Done() {
  if (ABSL_PREDICT_TRUE(ok())) {
    BackwardWriter& dest = *DestWriter();
    SyncBuffer(dest);
  }
  BackwardWriter::Done();
}

bool PositionShiftingBackwardWriterBase::FailUnderflow(Position new_pos,
                                                       Object& object) {
  return object.Fail(absl::InvalidArgumentError(
      absl::StrCat("PositionShiftingBackwardWriter does not support "
                   "truncating before the base position: ",
                   new_pos, " < ", base_pos_)));
}

absl::Status PositionShiftingBackwardWriterBase::AnnotateStatusImpl(
    absl::Status status) {
  if (is_open()) {
    BackwardWriter& dest = *DestWriter();
    SyncBuffer(dest);
    status = dest.AnnotateStatus(std::move(status));
    MakeBuffer(dest);
  }
  // The status might have been annotated by `dest` with the original position.
  // Clarify that the current position is the relative position instead of
  // delegating to `BackwardWriter::AnnotateStatusImpl()`.
  return AnnotateOverDest(std::move(status));
}

absl::Status PositionShiftingBackwardWriterBase::AnnotateOverDest(
    absl::Status status) {
  if (is_open() && base_pos_ > 0) {
    return Annotate(status,
                    absl::StrCat("with relative position at byte ", pos()));
  }
  return status;
}

bool PositionShiftingBackwardWriterBase::PushSlow(size_t min_length,
                                                  size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of BackwardWriter::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  BackwardWriter& dest = *DestWriter();
  SyncBuffer(dest);
  const bool push_ok = dest.Push(min_length, recommended_length);
  return MakeBuffer(dest, min_length) && push_ok;
}

bool PositionShiftingBackwardWriterBase::WriteSlow(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(string_view): "
         "enough space available, use Write(string_view) instead";
  return WriteInternal(src);
}

bool PositionShiftingBackwardWriterBase::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  return WriteInternal(std::move(src));
}

bool PositionShiftingBackwardWriterBase::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  return WriteInternal(src);
}

bool PositionShiftingBackwardWriterBase::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  return WriteInternal(std::move(src));
}

bool PositionShiftingBackwardWriterBase::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  return WriteInternal(src);
}

bool PositionShiftingBackwardWriterBase::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  return WriteInternal(std::move(src));
}

bool PositionShiftingBackwardWriterBase::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  return WriteInternal(src);
}

template <typename Src>
inline bool PositionShiftingBackwardWriterBase::WriteInternal(Src&& src) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  BackwardWriter& dest = *DestWriter();
  SyncBuffer(dest);
  const bool write_ok = dest.Write(std::forward<Src>(src));
  return MakeBuffer(dest) && write_ok;
}

bool PositionShiftingBackwardWriterBase::SupportsTruncate() {
  BackwardWriter* const dest = DestWriter();
  return dest != nullptr && dest->SupportsTruncate();
}

bool PositionShiftingBackwardWriterBase::TruncateImpl(Position new_size) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(new_size < base_pos_)) {
    return FailUnderflow(new_size, *this);
  }
  BackwardWriter& dest = *DestWriter();
  SyncBuffer(dest);
  const bool truncate_ok = dest.Truncate(new_size - base_pos_);
  return MakeBuffer(dest) && truncate_ok;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/position_shifting_backward_writer.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_POSITION_SHIFTING_BACKWARD_WRITER_H_
#define RIEGELI_BYTES_POSITION_SHIFTING_BACKWARD_WRITER_H_

#include <stddef.h>

#include <limits>
#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"

namespace riegeli {

// Template parameter independent part of `PositionShiftingBackwardWriter`.
class PositionShiftingBackwardWriterBase : public BackwardWriter {
 public:
  class Options {
   public:
    Options() noexcept {}

    // The base position of the new `BackwardWriter`.
    //
    // Default: 0.
    Options& set_base_pos(Position base_pos) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      base_pos_ = base_pos;
      return *this;
    }
    Options&& set_base_pos(Position base_pos) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_base_pos(base_pos));
    }
    Position base_pos() const { return base_pos_; }

   private:
    Position base_pos_ = 0;
  };

  // Returns the new `BackwardWriter`. Unchanged by `Close()`.
  virtual BackwardWriter* DestWriter() const = 0;

  // Returns the base position of the original `BackwardWriter`.
  Position base_pos() const { return base_pos_; }

  bool SupportsTruncate() override;

 protected:
  explicit PositionShiftingBackwardWriterBase(Closed) noexcept
      : BackwardWriter(kClosed) {}

  explicit PositionShiftingBackwardWriterBase(Position base_pos);

  PositionShiftingBackwardWriterBase(
      PositionShiftingBackwardWriterBase&& that) noexcept;
  PositionShiftingBackwardWriterBase& operator=(
      PositionShiftingBackwardWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(Position base_pos);
  void Initialize(BackwardWriter* dest);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverDest(absl::Status status);

  // Sets cursor of `dest` to cursor of `*this`.
  void SyncBuffer(BackwardWriter& dest);

  // Sets buffer pointers of `*this` to buffer pointers of `dest`, adjusting
  // `start()` to hide data already written. Fails `*this` if `dest` failed
  // or there is not enough `Position` space for `min_length`.
  bool MakeBuffer(BackwardWriter& dest, size_t min_length = 0);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using BackwardWriter::WriteSlow;
  bool WriteSlow(absl::string_view src) override;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(absl::Cord&& src) override;
  bool WriteSlow(ByteFill src) override;
  bool TruncateImpl(Position new_size) override;

 private:
  ABSL_ATTRIBUTE_COLD bool FailUnderflow(Position new_pos, Object& object);

  // This template is defined and used only in position_shifting_writer.cc.
  template <typename Src>
  bool WriteInternal(Src&& src);

  Position base_pos_ = 0;

  // Invariants if `ok()`:
  //   `start() == DestWriter()->cursor()`
  //   `limit() == DestWriter()->limit()`
  //   `start_pos() == DestWriter()->pos() + base_pos_`
};

// A `BackwardWriter` which writes to another `BackwardWriter`, reporting
// positions shifted so that the beginning appears as the given base position.
//
// `PrefixLimitingBackwardWriter` can be used for shifting positions in the
// other direction.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the original `BackwardWriter`. `Dest` must support
// `Dependency<BackwardWriter*, Dest>`, e.g.
// `BackwardWriter*` (not owned, default),
// `ChainNackwardWriter<>` (owned), `std::unique_ptr<BackwardWriter>` (owned),
// `Any<BackwardWriter*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The original `BackwardWriter` must not be accessed until the
// `PositionShiftingBackwardWriter` is closed or no longer used, except that
// it is allowed to read the destination of the original `BackwardWriter`
// immediately after `Flush()`.
template <typename Dest = BackwardWriter*>
class PositionShiftingBackwardWriter
    : public PositionShiftingBackwardWriterBase {
 public:
  // Creates a closed `PositionShiftingBackwardWriter`.
  explicit PositionShiftingBackwardWriter(Closed) noexcept
      : PositionShiftingBackwardWriterBase(kClosed) {}

  // Will write to the original `BackwardWriter` provided by `dest`.
  explicit PositionShiftingBackwardWriter(Initializer<Dest> dest,
                                          Options options = Options());

  PositionShiftingBackwardWriter(PositionShiftingBackwardWriter&& that) =
      default;
  PositionShiftingBackwardWriter& operator=(
      PositionShiftingBackwardWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed
  // `PositionShiftingBackwardWriter`. This avoids constructing a temporary
  // `PositionShiftingBackwardWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the original
  // `BackwardWriter`. Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  BackwardWriter* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  class Mover;

  // The object providing and possibly owning the original `BackwardWriter`.
  MovingDependency<BackwardWriter*, Dest, Mover> dest_;
};

explicit PositionShiftingBackwardWriter(Closed)
    -> PositionShiftingBackwardWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit PositionShiftingBackwardWriter(
    Dest&& dest, PositionShiftingBackwardWriterBase::Options options =
                     PositionShiftingBackwardWriterBase::Options())
    -> PositionShiftingBackwardWriter<TargetT<Dest>>;

// Implementation details follow.

inline PositionShiftingBackwardWriterBase::PositionShiftingBackwardWriterBase(
    Position base_pos)
    : base_pos_(base_pos) {}

inline PositionShiftingBackwardWriterBase::PositionShiftingBackwardWriterBase(
    PositionShiftingBackwardWriterBase&& that) noexcept
    : BackwardWriter(static_cast<BackwardWriter&&>(that)),
      base_pos_(that.base_pos_) {}

inline PositionShiftingBackwardWriterBase&
PositionShiftingBackwardWriterBase::operator=(
    PositionShiftingBackwardWriterBase&& that) noexcept {
  BackwardWriter::operator=(static_cast<BackwardWriter&&>(that));
  base_pos_ = that.base_pos_;
  return *this;
}

inline void PositionShiftingBackwardWriterBase::Reset(Closed) {
  BackwardWriter::Reset(kClosed);
  base_pos_ = 0;
}

inline void PositionShiftingBackwardWriterBase::Reset(Position base_pos) {
  BackwardWriter::Reset();
  base_pos_ = base_pos;
}

inline void PositionShiftingBackwardWriterBase::Initialize(
    BackwardWriter* dest) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of PositionShiftingBackwardWriter: "
         "null BackwardWriter pointer";
  MakeBuffer(*dest);
}

inline void PositionShiftingBackwardWriterBase::SyncBuffer(
    BackwardWriter& dest) {
  dest.set_cursor(cursor());
}

inline bool PositionShiftingBackwardWriterBase::MakeBuffer(BackwardWriter& dest,
                                                           size_t min_length) {
  const Position max_pos = std::numeric_limits<Position>::max() - base_pos_;
  if (ABSL_PREDICT_FALSE(dest.limit_pos() > max_pos)) {
    if (ABSL_PREDICT_FALSE(dest.pos() > max_pos)) {
      set_buffer(dest.cursor());
      set_start_pos(std::numeric_limits<Position>::max());
      return FailOverflow();
    }
    set_buffer(dest.cursor() - IntCast<size_t>(max_pos - dest.pos()),
               IntCast<size_t>(max_pos - dest.pos()));
    set_start_pos(dest.pos() + base_pos_);
    if (ABSL_PREDICT_FALSE(available() < min_length)) return FailOverflow();
  } else {
    set_buffer(dest.limit(), dest.available());
    set_start_pos(dest.pos() + base_pos_);
  }
  if (ABSL_PREDICT_FALSE(!dest.ok())) {
    return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
  }
  return true;
}

template <typename Dest>
class PositionShiftingBackwardWriter<Dest>::Mover {
 public:
  static auto member() { return &PositionShiftingBackwardWriter::dest_; }

  explicit Mover(PositionShiftingBackwardWriter& self,
                 PositionShiftingBackwardWriter& that)
      : uses_buffer_(self.start() != nullptr) {
    // Buffer pointers are already moved so `SyncBuffer()` is called on `self`.
    // `dest_` is not moved yet so `dest_` is taken from `that`.
    if (uses_buffer_) self.SyncBuffer(*that.dest_);
  }

  void Done(PositionShiftingBackwardWriter& self) {
    if (uses_buffer_) self.MakeBuffer(*self.dest_);
  }

 private:
  bool uses_buffer_;
};

template <typename Dest>
inline PositionShiftingBackwardWriter<Dest>::PositionShiftingBackwardWriter(
    Initializer<Dest> dest, Options options)
    : PositionShiftingBackwardWriterBase(options.base_pos()),
      dest_(std::move(dest)) {
  Initialize(dest_.get());
}

template <typename Dest>
inline void PositionShiftingBackwardWriter<Dest>::Reset(Closed) {
  PositionShiftingBackwardWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void PositionShiftingBackwardWriter<Dest>::Reset(Initializer<Dest> dest,
                                                        Options options) {
  PositionShiftingBackwardWriterBase::Reset(options.base_pos());
  dest_.Reset(std::move(dest));
  Initialize(dest_.get());
}

template <typename Dest>
void PositionShiftingBackwardWriter<Dest>::Done() {
  PositionShiftingBackwardWriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
}

template <typename Dest>
void PositionShiftingBackwardWriter<Dest>::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  if (dest_.IsOwning()) {
    SyncBuffer(*dest_);
    dest_->SetWriteSizeHint(
        write_size_hint == std::nullopt
            ? std::nullopt
            : std::make_optional(SaturatingAdd(base_pos(), *write_size_hint)));
    MakeBuffer(*dest_);
  }
}

template <typename Dest>
bool PositionShiftingBackwardWriter<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer(*dest_);
  bool flush_ok = true;
  if (flush_type != FlushType::kFromObject || dest_.IsOwning()) {
    flush_ok = dest_->Flush(flush_type);
  }
  return MakeBuffer(*dest_) && flush_ok;
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_POSITION_SHIFTING_BACKWARD_WRITER_H_


================================================
FILE: riegeli/bytes/position_shifting_reader.cc
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/position_shifting_reader.h"

#include <stddef.h>

#include <limits>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/object.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void PositionShiftingReaderBase::Done() {
  if (ABSL_PREDICT_TRUE(ok())) {
    Reader& src = *SrcReader();
    SyncBuffer(src);
  }
  Reader::Done();
}

bool PositionShiftingReaderBase::FailUnderflow(Position new_pos,
                                               Object& object) {
  return object.Fail(absl::InvalidArgumentError(
      absl::StrCat("PositionShiftingReader does not support "
                   "seeking before the base position: ",
                   new_pos, " < ", base_pos_)));
}

absl::Status PositionShiftingReaderBase::AnnotateStatusImpl(
    absl::Status status) {
  if (is_open()) {
    Reader& src = *SrcReader();
    SyncBuffer(src);
    status = src.AnnotateStatus(std::move(status));
    MakeBuffer(src);
  }
  // The status might have been annotated by `src` with the original position.
  // Clarify that the current position is the relative position instead of
  // delegating to `Reader::AnnotateStatusImpl()`.
  return AnnotateOverSrc(std::move(status));
}

absl::Status PositionShiftingReaderBase::AnnotateOverSrc(absl::Status status) {
  if (is_open() && base_pos_ > 0) {
    return Annotate(status,
                    absl::StrCat("with relative position at byte ", pos()));
  }
  return status;
}

bool PositionShiftingReaderBase::PullSlow(size_t min_length,
                                          size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Reader::PullSlow(): "
         "enough data available, use Pull() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool pull_ok = src.Pull(min_length, recommended_length);
  return MakeBuffer(src, min_length) && pull_ok;
}

bool PositionShiftingReaderBase::ReadSlow(size_t length, char* dest) {
  RIEGELI_ASSERT_LT(available(), length)
      << "Failed precondition of Reader::ReadSlow(char*): "
         "enough data available, use Read(char*) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool read_ok = src.Read(length, dest);
  return MakeBuffer(src) && read_ok;
}

bool PositionShiftingReaderBase::ReadSlow(size_t length, Chain& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "enough data available, use Read(Chain&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "Chain size overflow";
  return ReadInternal(length, dest);
}

bool PositionShiftingReaderBase::ReadSlow(size_t length, absl::Cord& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "enough data available, use Read(Cord&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "Cord size overflow";
  return ReadInternal(length, dest);
}

template <typename Dest>
inline bool PositionShiftingReaderBase::ReadInternal(size_t length,
                                                     Dest& dest) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool read_ok = src.ReadAndAppend(length, dest);
  return MakeBuffer(src) && read_ok;
}

bool PositionShiftingReaderBase::CopySlow(Position length, Writer& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::CopySlow(Writer&): "
         "enough data available, use Copy(Writer&) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool copy_ok = src.Copy(length, dest);
  return MakeBuffer(src) && copy_ok;
}

bool PositionShiftingReaderBase::CopySlow(size_t length, BackwardWriter& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::CopySlow(BackwardWriter&): "
         "enough data available, use Copy(BackwardWriter&) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool copy_ok = src.Copy(length, dest);
  return MakeBuffer(src) && copy_ok;
}

bool PositionShiftingReaderBase::ReadSomeSlow(size_t max_length, char* dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "nothing to read, use ReadSome(char*) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "some data available, use ReadSome(char*) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool read_ok = src.ReadSome(max_length, dest);
  return MakeBuffer(src) && read_ok;
}

bool PositionShiftingReaderBase::CopySomeSlow(size_t max_length, Writer& dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::CopySomeSlow(Writer&): "
         "nothing to read, use CopySome(Writer&) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::CopySomeSlow(Writer&): "
         "some data available, use CopySome(Writer&) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool copy_ok = src.CopySome(max_length, dest);
  return MakeBuffer(src) && copy_ok;
}

void PositionShiftingReaderBase::ReadHintSlow(size_t min_length,
                                              size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Reader::ReadHintSlow(): "
         "enough data available, use ReadHint() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  src.ReadHint(min_length, recommended_length);
  MakeBuffer(src);
}

bool PositionShiftingReaderBase::ToleratesReadingAhead() {
  Reader* const src = SrcReader();
  return src != nullptr && src->ToleratesReadingAhead();
}

bool PositionShiftingReaderBase::SupportsRandomAccess() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsRandomAccess();
}

bool PositionShiftingReaderBase::SupportsRewind() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsRewind();
}

bool PositionShiftingReaderBase::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of Reader::SeekSlow(): "
         "position in the buffer, use Seek() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(new_pos < base_pos_)) {
    return FailUnderflow(new_pos, *this);
  }
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool seek_ok = src.Seek(new_pos - base_pos_);
  return MakeBuffer(src) && seek_ok;
}

bool PositionShiftingReaderBase::SupportsSize() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsSize();
}

std::optional<Position> PositionShiftingReaderBase::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const std::optional<Position> size = src.Size();
  if (ABSL_PREDICT_FALSE(!MakeBuffer(src) || size == std::nullopt)) {
    return std::nullopt;
  }
  if (ABSL_PREDICT_FALSE(*size >
                         std::numeric_limits<Position>::max() - base_pos_)) {
    FailOverflow();
    return std::nullopt;
  }
  return *size + base_pos_;
}

bool PositionShiftingReaderBase::SupportsNewReader() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsNewReader();
}

std::unique_ptr<Reader> PositionShiftingReaderBase::NewReaderImpl(
    Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point
  // if `SrcReader()->SupportsNewReader()`.
  Reader& src = *SrcReader();
  std::unique_ptr<Reader> base_reader =
      src.NewReader(SaturatingSub(initial_pos, base_pos_));
  if (ABSL_PREDICT_FALSE(base_reader == nullptr)) {
    FailWithoutAnnotation(AnnotateOverSrc(src.status()));
    return nullptr;
  }
  std::unique_ptr<Reader> reader =
      std::make_unique<PositionShiftingReader<std::unique_ptr<Reader>>>(
          std::move(base_reader),
          PositionShiftingReaderBase::Options().set_base_pos(base_pos_));
  if (ABSL_PREDICT_FALSE(initial_pos < base_pos_)) {
    FailUnderflow(initial_pos, *reader);
  }
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/position_shifting_reader.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_POSITION_SHIFTING_READER_H_
#define RIEGELI_BYTES_POSITION_SHIFTING_READER_H_

#include <stddef.h>

#include <limits>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

class BackwardWriter;
class Writer;

// Template parameter independent part of `PositionShiftingReader`.
class PositionShiftingReaderBase : public Reader {
 public:
  class Options {
   public:
    Options() noexcept {}

    // The base position of the new `Reader`.
    //
    // Default: 0.
    Options& set_base_pos(Position base_pos) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      base_pos_ = base_pos;
      return *this;
    }
    Options&& set_base_pos(Position base_pos) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_base_pos(base_pos));
    }
    Position base_pos() const { return base_pos_; }

   private:
    Position base_pos_ = 0;
  };

  // Returns the original `Reader`. Unchanged by `Close()`.
  virtual Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns the base position of the new `Reader`.
  Position base_pos() const { return base_pos_; }

  bool ToleratesReadingAhead() override;
  bool SupportsRandomAccess() override;
  bool SupportsRewind() override;
  bool SupportsSize() override;
  bool SupportsNewReader() override;

 protected:
  explicit PositionShiftingReaderBase(Closed) noexcept : Reader(kClosed) {}

  explicit PositionShiftingReaderBase(Position base_pos);

  PositionShiftingReaderBase(PositionShiftingReaderBase&& that) noexcept;
  PositionShiftingReaderBase& operator=(
      PositionShiftingReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset(Position base_pos);
  void Initialize(Reader* src);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverSrc(absl::Status status);

  // Sets cursor of `src` to cursor of `*this`.
  void SyncBuffer(Reader& src);

  // Sets buffer pointers of `*this` to buffer pointers of `src`. Fails `*this`
  // if `src` failed or there is not enough `Position` space for `min_length`.
  bool MakeBuffer(Reader& src, size_t min_length = 0);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool PullSlow(size_t min_length, size_t recommended_length) override;
  using Reader::ReadSlow;
  bool ReadSlow(size_t length, char* dest) override;
  bool ReadSlow(size_t length, Chain& dest) override;
  bool ReadSlow(size_t length, absl::Cord& dest) override;
  using Reader::CopySlow;
  bool CopySlow(Position length, Writer& dest) override;
  bool CopySlow(size_t length, BackwardWriter& dest) override;
  using Reader::ReadSomeSlow;
  bool ReadSomeSlow(size_t max_length, char* dest) override;
  using Reader::CopySomeSlow;
  bool CopySomeSlow(size_t max_length, Writer& dest) override;
  void ReadHintSlow(size_t min_length, size_t recommended_length) override;
  bool SeekSlow(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;

 private:
  ABSL_ATTRIBUTE_COLD bool FailUnderflow(Position new_pos, Object& object);

  // This template is defined and used only in position_shifting_reader.cc.
  template <typename Dest>
  bool ReadInternal(size_t length, Dest& dest);

  Position base_pos_ = 0;

  // Invariants if `ok()`:
  //   `start() == SrcReader()->start()`
  //   `limit() == SrcReader()->limit()`
  //   `start_pos() == SrcReader()->start_pos() + base_pos_`
};

// A `Reader` which reads from another `Reader`, reporting positions shifted so
// that the beginning appears as the given base position. Seeking back before
// the base position fails.
//
// `PrefixLimitingReader` can be used for shifting positions in the other
// direction.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the original `Reader`. `Src` must support
// `Dependency<Reader*, Src>`, e.g. `Reader*` (not owned, default),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The original `Reader` must not be accessed until the `PositionShiftingReader`
// is closed or no longer used.
template <typename Src = Reader*>
class PositionShiftingReader : public PositionShiftingReaderBase {
 public:
  // Creates a closed `PositionShiftingReader`.
  explicit PositionShiftingReader(Closed) noexcept
      : PositionShiftingReaderBase(kClosed) {}

  // Will read from the original `Reader` provided by `src`.
  explicit PositionShiftingReader(Initializer<Src> src,
                                  Options options = Options());

  PositionShiftingReader(PositionShiftingReader&& that) = default;
  PositionShiftingReader& operator=(PositionShiftingReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `PositionShiftingReader`.
  // This avoids constructing a temporary `PositionShiftingReader` and moving
  // from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());

  // Returns the object providing and possibly owning the original `Reader`.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;
  void SetReadAllHintImpl(bool read_all_hint) override;
  void VerifyEndImpl() override;
  bool SyncImpl(SyncType sync_type) override;

 private:
  class Mover;

  // The object providing and possibly owning the original `Reader`.
  MovingDependency<Reader*, Src, Mover> src_;
};

explicit PositionShiftingReader(Closed)
    -> PositionShiftingReader<DeleteCtad<Closed>>;
template <typename Src>
explicit PositionShiftingReader(Src&& src,
                                PositionShiftingReaderBase::Options options =
                                    PositionShiftingReaderBase::Options())
    -> PositionShiftingReader<TargetT<Src>>;

// Implementation details follow.

inline PositionShiftingReaderBase::PositionShiftingReaderBase(Position base_pos)
    : base_pos_(base_pos) {}

inline PositionShiftingReaderBase::PositionShiftingReaderBase(
    PositionShiftingReaderBase&& that) noexcept
    : Reader(static_cast<Reader&&>(that)), base_pos_(that.base_pos_) {}

inline PositionShiftingReaderBase& PositionShiftingReaderBase::operator=(
    PositionShiftingReaderBase&& that) noexcept {
  Reader::operator=(static_cast<Reader&&>(that));
  base_pos_ = that.base_pos_;
  return *this;
}

inline void PositionShiftingReaderBase::Reset(Closed) {
  Reader::Reset(kClosed);
  base_pos_ = 0;
}

inline void PositionShiftingReaderBase::Reset(Position base_pos) {
  Reader::Reset();
  base_pos_ = base_pos;
}

inline void PositionShiftingReaderBase::Initialize(Reader* src) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of PositionShiftingReader: null Reader pointer";
  MakeBuffer(*src);
}

inline void PositionShiftingReaderBase::SyncBuffer(Reader& src) {
  src.set_cursor(cursor());
}

inline bool PositionShiftingReaderBase::MakeBuffer(Reader& src,
                                                   size_t min_length) {
  const Position max_pos = std::numeric_limits<Position>::max() - base_pos_;
  if (ABSL_PREDICT_FALSE(src.limit_pos() > max_pos)) {
    if (ABSL_PREDICT_FALSE(src.pos() > max_pos)) {
      set_buffer(src.cursor());
      set_limit_pos(std::numeric_limits<Position>::max());
      return FailOverflow();
    }
    set_buffer(src.start(), IntCast<size_t>(max_pos - src.start_pos()),
               src.start_to_cursor());
    set_limit_pos(std::numeric_limits<Position>::max());
    if (ABSL_PREDICT_FALSE(available() < min_length)) return FailOverflow();
  } else {
    set_buffer(src.start(), src.start_to_limit(), src.start_to_cursor());
    set_limit_pos(src.limit_pos() + base_pos_);
  }
  if (ABSL_PREDICT_FALSE(!src.ok())) {
    return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
  }
  return true;
}

template <typename Src>
inline PositionShiftingReader<Src>::PositionShiftingReader(Initializer<Src> src,
                                                           Options options)
    : PositionShiftingReaderBase(options.base_pos()), src_(std::move(src)) {
  Initialize(src_.get());
}

template <typename Src>
inline void PositionShiftingReader<Src>::Reset(Closed) {
  PositionShiftingReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void PositionShiftingReader<Src>::Reset(Initializer<Src> src,
                                               Options options) {
  PositionShiftingReaderBase::Reset(options.base_pos());
  src_.Reset(std::move(src));
  Initialize(src_.get());
}

template <typename Src>
class PositionShiftingReader<Src>::Mover {
 public:
  static auto member() { return &PositionShiftingReader::src_; }

  explicit Mover(PositionShiftingReader& self, PositionShiftingReader& that)
      : uses_buffer_(self.start() != nullptr) {
    // Buffer pointers are already moved so `SyncBuffer()` is called on `self`.
    // `src_` is not moved yet so `src_` is taken from `that`.
    if (uses_buffer_) self.SyncBuffer(*that.src_);
  }

  void Done(PositionShiftingReader& self) {
    if (uses_buffer_) self.MakeBuffer(*self.src_);
  }

 private:
  bool uses_buffer_;
};

template <typename Src>
void PositionShiftingReader<Src>::Done() {
  PositionShiftingReaderBase::Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      FailWithoutAnnotation(AnnotateOverSrc(src_->status()));
    }
  }
}

template <typename Src>
void PositionShiftingReader<Src>::SetReadAllHintImpl(bool read_all_hint) {
  if (src_.IsOwning()) {
    SyncBuffer(*src_);
    src_->SetReadAllHint(read_all_hint);
    MakeBuffer(*src_);
  }
}

template <typename Src>
void PositionShiftingReader<Src>::VerifyEndImpl() {
  if (!src_.IsOwning()) {
    PositionShiftingReaderBase::VerifyEndImpl();
  } else if (ABSL_PREDICT_TRUE(ok())) {
    SyncBuffer(*src_);
    src_->VerifyEnd();
    MakeBuffer(*src_);
  }
}

template <typename Src>
bool PositionShiftingReader<Src>::SyncImpl(SyncType sync_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer(*src_);
  bool sync_ok = true;
  if (sync_type != SyncType::kFromObject || src_.IsOwning()) {
    sync_ok = src_->Sync(sync_type);
  }
  return MakeBuffer(*src_) && sync_ok;
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_POSITION_SHIFTING_READER_H_


================================================
FILE: riegeli/bytes/position_shifting_writer.cc
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/position_shifting_writer.h"

#include <stddef.h>

#include <limits>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/object.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/position_shifting_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void PositionShiftingWriterBase::Done() {
  if (ABSL_PREDICT_TRUE(ok())) {
    Writer& dest = *DestWriter();
    SyncBuffer(dest);
  }
  Writer::Done();
  associated_reader_.Reset();
}

bool PositionShiftingWriterBase::FailUnderflow(absl::string_view operation,
                                               Position new_pos,
                                               Object& object) {
  return object.Fail(absl::InvalidArgumentError(
      absl::StrCat("PositionShiftingWriter does not support ", operation,
                   " before the base position: ", new_pos, " < ", base_pos_)));
}

absl::Status PositionShiftingWriterBase::AnnotateStatusImpl(
    absl::Status status) {
  if (is_open()) {
    Writer& dest = *DestWriter();
    SyncBuffer(dest);
    status = dest.AnnotateStatus(std::move(status));
    MakeBuffer(dest);
  }
  // The status might have been annotated by `dest` with the original position.
  // Clarify that the current position is the relative position instead of
  // delegating to `Writer::AnnotateStatusImpl()`.
  return AnnotateOverDest(std::move(status));
}

absl::Status PositionShiftingWriterBase::AnnotateOverDest(absl::Status status) {
  if (is_open() && base_pos_ > 0) {
    return Annotate(status,
                    absl::StrCat("with relative position at byte ", pos()));
  }
  return status;
}

bool PositionShiftingWriterBase::PushSlow(size_t min_length,
                                          size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Writer::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  SyncBuffer(dest);
  const bool push_ok = dest.Push(min_length, recommended_length);
  return MakeBuffer(dest, min_length) && push_ok;
}

bool PositionShiftingWriterBase::WriteSlow(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of Writer::WriteSlow(string_view): "
         "enough space available, use Write(string_view) instead";
  return WriteInternal(src);
}

bool PositionShiftingWriterBase::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  return WriteInternal(std::move(src));
}

bool PositionShiftingWriterBase::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  return WriteInternal(src);
}

bool PositionShiftingWriterBase::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  return WriteInternal(std::move(src));
}

bool PositionShiftingWriterBase::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  return WriteInternal(src);
}

bool PositionShiftingWriterBase::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  return WriteInternal(std::move(src));
}

bool PositionShiftingWriterBase::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  return WriteInternal(src);
}

template <typename Src>
inline bool PositionShiftingWriterBase::WriteInternal(Src&& src) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  SyncBuffer(dest);
  const bool write_ok = dest.Write(std::forward<Src>(src));
  return MakeBuffer(dest) && write_ok;
}

bool PositionShiftingWriterBase::SupportsRandomAccess() {
  Writer* const dest = DestWriter();
  return dest != nullptr && dest->SupportsRandomAccess();
}

bool PositionShiftingWriterBase::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT_NE(new_pos, pos())
      << "Failed precondition of Writer::SeekSlow(): "
         "position unchanged, use Seek() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(new_pos < base_pos_)) {
    return FailUnderflow("seeking", new_pos, *this);
  }
  Writer& dest = *DestWriter();
  SyncBuffer(dest);
  const bool seek_ok = dest.Seek(new_pos - base_pos_);
  return MakeBuffer(dest) && seek_ok;
}

std::optional<Position> PositionShiftingWriterBase::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  Writer& dest = *DestWriter();
  SyncBuffer(dest);
  const std::optional<Position> size = dest.Size();
  if (ABSL_PREDICT_FALSE(!MakeBuffer(dest) || size == std::nullopt)) {
    return std::nullopt;
  }
  if (ABSL_PREDICT_FALSE(*size >
                         std::numeric_limits<Position>::max() - base_pos_)) {
    FailOverflow();
    return std::nullopt;
  }
  return *size + base_pos_;
}

bool PositionShiftingWriterBase::SupportsTruncate() {
  Writer* const dest = DestWriter();
  return dest != nullptr && dest->SupportsTruncate();
}

bool PositionShiftingWriterBase::TruncateImpl(Position new_size) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(new_size < base_pos_)) {
    return FailUnderflow("truncating", new_size, *this);
  }
  Writer& dest = *DestWriter();
  SyncBuffer(dest);
  const bool truncate_ok = dest.Truncate(new_size - base_pos_);
  return MakeBuffer(dest) && truncate_ok;
}

bool PositionShiftingWriterBase::SupportsReadMode() {
  Writer* const dest = DestWriter();
  return dest != nullptr && dest->SupportsReadMode();
}

Reader* PositionShiftingWriterBase::ReadModeImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  Writer& dest = *DestWriter();
  SyncBuffer(dest);
  Reader* const base_reader =
      dest.ReadMode(SaturatingSub(initial_pos, base_pos_));
  if (ABSL_PREDICT_FALSE(!MakeBuffer(dest) || base_reader == nullptr)) {
    return nullptr;
  }
  PositionShiftingReader<>* const reader = associated_reader_.ResetReader(
      base_reader,
      PositionShiftingReaderBase::Options().set_base_pos(base_pos_));
  if (ABSL_PREDICT_FALSE(initial_pos < base_pos_)) {
    FailUnderflow("seeking", initial_pos, *reader);
  }
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/position_shifting_writer.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_POSITION_SHIFTING_WRITER_H_
#define RIEGELI_BYTES_POSITION_SHIFTING_WRITER_H_

#include <stddef.h>

#include <limits>
#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

template <typename Src>
class PositionShiftingReader;
class Reader;

// Template parameter independent part of `PositionShiftingWriter`.
class PositionShiftingWriterBase : public Writer {
 public:
  class Options {
   public:
    Options() noexcept {}

    // The base position of the new `Writer`.
    //
    // Default: 0.
    Options& set_base_pos(Position base_pos) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      base_pos_ = base_pos;
      return *this;
    }
    Options&& set_base_pos(Position base_pos) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_base_pos(base_pos));
    }
    Position base_pos() const { return base_pos_; }

   private:
    Position base_pos_ = 0;
  };

  // Returns the new `Writer`. Unchanged by `Close()`.
  virtual Writer* DestWriter() const = 0;

  // Returns the base position of the original `Writer`.
  Position base_pos() const { return base_pos_; }

  bool SupportsRandomAccess() override;
  bool SupportsTruncate() override;
  bool SupportsReadMode() override;

 protected:
  explicit PositionShiftingWriterBase(Closed) noexcept : Writer(kClosed) {}

  explicit PositionShiftingWriterBase(Position base_pos);

  PositionShiftingWriterBase(PositionShiftingWriterBase&& that) noexcept;
  PositionShiftingWriterBase& operator=(
      PositionShiftingWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(Position base_pos);
  void Initialize(Writer* dest);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverDest(absl::Status status);

  // Sets cursor of `dest` to cursor of `*this`.
  void SyncBuffer(Writer& dest);

  // Sets buffer pointers of `*this` to buffer pointers of `dest`, adjusting
  // `start()` to hide data already written. Fails `*this` if `dest` failed
  // or there is not enough `Position` space for `min_length`.
  bool MakeBuffer(Writer& dest, size_t min_length = 0);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using Writer::WriteSlow;
  bool WriteSlow(absl::string_view src) override;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(absl::Cord&& src) override;
  bool WriteSlow(ByteFill src) override;
  bool SeekSlow(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  bool TruncateImpl(Position new_size) override;
  Reader* ReadModeImpl(Position initial_pos) override;

 private:
  ABSL_ATTRIBUTE_COLD bool FailUnderflow(absl::string_view operation,
                                         Position new_pos, Object& object);

  // This template is defined and used only in position_shifting_writer.cc.
  template <typename Src>
  bool WriteInternal(Src&& src);

  Position base_pos_ = 0;

  AssociatedReader<PositionShiftingReader<Reader*>> associated_reader_;

  // Invariants if `ok()`:
  //   `start() == DestWriter()->cursor()`
  //   `limit() == DestWriter()->limit()`
  //   `start_pos() == DestWriter()->pos() + base_pos_`
};

// A `Writer` which writes to another `Writer`, reporting positions shifted so
// that the beginning appears as the given base position. Seeking back before
// the base position fails.
//
// `PrefixLimitingWriter` can be used for shifting positions in the other
// direction.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the original `Writer`. `Dest` must support
// `Dependency<Writer*, Dest>`, e.g. `Writer*` (not owned, default),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The original `Writer` must not be accessed until the `PositionShiftingWriter`
// is closed or no longer used, except that it is allowed to read the
// destination of the original `Writer` immediately after `Flush()`.
template <typename Dest = Writer*>
class PositionShiftingWriter : public PositionShiftingWriterBase {
 public:
  // Creates a closed `PositionShiftingWriter`.
  explicit PositionShiftingWriter(Closed) noexcept
      : PositionShiftingWriterBase(kClosed) {}

  // Will write to the original `Writer` provided by `dest`.
  explicit PositionShiftingWriter(Initializer<Dest> dest,
                                  Options options = Options());

  PositionShiftingWriter(PositionShiftingWriter&& that) = default;
  PositionShiftingWriter& operator=(PositionShiftingWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `PositionShiftingWriter`.
  // This avoids constructing a temporary `PositionShiftingWriter` and moving
  // from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the original `Writer`.
  // Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  class Mover;

  // The object providing and possibly owning the original `Writer`.
  MovingDependency<Writer*, Dest, Mover> dest_;
};

explicit PositionShiftingWriter(Closed)
    -> PositionShiftingWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit PositionShiftingWriter(Dest&& dest,
                                PositionShiftingWriterBase::Options options =
                                    PositionShiftingWriterBase::Options())
    -> PositionShiftingWriter<TargetT<Dest>>;

// Implementation details follow.

inline PositionShiftingWriterBase::PositionShiftingWriterBase(Position base_pos)
    : base_pos_(base_pos) {}

inline PositionShiftingWriterBase::PositionShiftingWriterBase(
    PositionShiftingWriterBase&& that) noexcept
    : Writer(static_cast<Writer&&>(that)),
      base_pos_(that.base_pos_),
      associated_reader_(std::move(that.associated_reader_)) {}

inline PositionShiftingWriterBase& PositionShiftingWriterBase::operator=(
    PositionShiftingWriterBase&& that) noexcept {
  Writer::operator=(static_cast<Writer&&>(that));
  base_pos_ = that.base_pos_;
  associated_reader_ = std::move(that.associated_reader_);
  return *this;
}

inline void PositionShiftingWriterBase::Reset(Closed) {
  Writer::Reset(kClosed);
  base_pos_ = 0;
  associated_reader_.Reset();
}

inline void PositionShiftingWriterBase::Reset(Position base_pos) {
  Writer::Reset();
  base_pos_ = base_pos;
  associated_reader_.Reset();
}

inline void PositionShiftingWriterBase::Initialize(Writer* dest) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of PositionShiftingWriter: null Writer pointer";
  MakeBuffer(*dest);
}

inline void PositionShiftingWriterBase::SyncBuffer(Writer& dest) {
  dest.set_cursor(cursor());
}

inline bool PositionShiftingWriterBase::MakeBuffer(Writer& dest,
                                                   size_t min_length) {
  const Position max_pos = std::numeric_limits<Position>::max() - base_pos_;
  if (ABSL_PREDICT_FALSE(dest.limit_pos() > max_pos)) {
    if (ABSL_PREDICT_FALSE(dest.pos() > max_pos)) {
      set_buffer(dest.cursor());
      set_start_pos(std::numeric_limits<Position>::max());
      return FailOverflow();
    }
    set_buffer(dest.cursor(), IntCast<size_t>(max_pos - dest.pos()));
    set_start_pos(dest.pos() + base_pos_);
    if (ABSL_PREDICT_FALSE(available() < min_length)) return FailOverflow();
  } else {
    set_buffer(dest.cursor(), dest.available());
    set_start_pos(dest.pos() + base_pos_);
  }
  if (ABSL_PREDICT_FALSE(!dest.ok())) {
    return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
  }
  return true;
}

template <typename Dest>
class PositionShiftingWriter<Dest>::Mover {
 public:
  static auto member() { return &PositionShiftingWriter::dest_; }

  explicit Mover(PositionShiftingWriter& self, PositionShiftingWriter& that)
      : uses_buffer_(self.start() != nullptr) {
    // Buffer pointers are already moved so `SyncBuffer()` is called on `self`.
    // `dest_` is not moved yet so `dest_` is taken from `that`.
    if (uses_buffer_) self.SyncBuffer(*that.dest_);
  }

  void Done(PositionShiftingWriter& self) {
    if (uses_buffer_) self.MakeBuffer(*self.dest_);
  }

 private:
  bool uses_buffer_;
};

template <typename Dest>
inline PositionShiftingWriter<Dest>::PositionShiftingWriter(
    Initializer<Dest> dest, Options options)
    : PositionShiftingWriterBase(options.base_pos()), dest_(std::move(dest)) {
  Initialize(dest_.get());
}

template <typename Dest>
inline void PositionShiftingWriter<Dest>::Reset(Closed) {
  PositionShiftingWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void PositionShiftingWriter<Dest>::Reset(Initializer<Dest> dest,
                                                Options options) {
  PositionShiftingWriterBase::Reset(options.base_pos());
  dest_.Reset(std::move(dest));
  Initialize(dest_.get());
}

template <typename Dest>
void PositionShiftingWriter<Dest>::Done() {
  PositionShiftingWriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
}

template <typename Dest>
void PositionShiftingWriter<Dest>::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  if (dest_.IsOwning()) {
    SyncBuffer(*dest_);
    dest_->SetWriteSizeHint(
        write_size_hint == std::nullopt
            ? std::nullopt
            : std::make_optional(SaturatingAdd(base_pos(), *write_size_hint)));
    MakeBuffer(*dest_);
  }
}

template <typename Dest>
bool PositionShiftingWriter<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer(*dest_);
  bool flush_ok = true;
  if (flush_type != FlushType::kFromObject || dest_.IsOwning()) {
    flush_ok = dest_->Flush(flush_type);
  }
  return MakeBuffer(*dest_) && flush_ok;
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_POSITION_SHIFTING_WRITER_H_


================================================
FILE: riegeli/bytes/prefix_limiting_backward_writer.cc
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/prefix_limiting_backward_writer.h"

#include <stddef.h>

#include <limits>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"

namespace riegeli {

void PrefixLimitingBackwardWriterBase::Done() {
  if (ABSL_PREDICT_TRUE(ok())) {
    BackwardWriter& dest = *DestWriter();
    SyncBuffer(dest);
  }
  BackwardWriter::Done();
}

absl::Status PrefixLimitingBackwardWriterBase::AnnotateStatusImpl(
    absl::Status status) {
  if (is_open()) {
    BackwardWriter& dest = *DestWriter();
    SyncBuffer(dest);
    status = dest.AnnotateStatus(std::move(status));
    MakeBuffer(dest);
  }
  // The status might have been annotated by `dest` with the original position.
  // Clarify that the current position is the relative position instead of
  // delegating to `BackwardWriter::AnnotateStatusImpl()`.
  return AnnotateOverDest(std::move(status));
}

absl::Status PrefixLimitingBackwardWriterBase::AnnotateOverDest(
    absl::Status status) {
  if (is_open() && base_pos_ > 0) {
    return Annotate(status,
                    absl::StrCat("with relative position at byte ", pos()));
  }
  return status;
}

bool PrefixLimitingBackwardWriterBase::PushSlow(size_t min_length,
                                                size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of BackwardWriter::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  BackwardWriter& dest = *DestWriter();
  SyncBuffer(dest);
  const bool push_ok = dest.Push(min_length, recommended_length);
  MakeBuffer(dest);
  return push_ok;
}

bool PrefixLimitingBackwardWriterBase::WriteSlow(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(string_view): "
         "enough space available, use Write(string_view) instead";
  return WriteInternal(src);
}

bool PrefixLimitingBackwardWriterBase::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  return WriteInternal(std::move(src));
}

bool PrefixLimitingBackwardWriterBase::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  return WriteInternal(src);
}

bool PrefixLimitingBackwardWriterBase::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  return WriteInternal(std::move(src));
}

bool PrefixLimitingBackwardWriterBase::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  return WriteInternal(src);
}

bool PrefixLimitingBackwardWriterBase::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  return WriteInternal(std::move(src));
}

bool PrefixLimitingBackwardWriterBase::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  return WriteInternal(src);
}

template <typename Src>
inline bool PrefixLimitingBackwardWriterBase::WriteInternal(Src&& src) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  BackwardWriter& dest = *DestWriter();
  SyncBuffer(dest);
  const bool write_ok = dest.Write(std::forward<Src>(src));
  MakeBuffer(dest);
  return write_ok;
}

bool PrefixLimitingBackwardWriterBase::SupportsTruncate() {
  BackwardWriter* const dest = DestWriter();
  return dest != nullptr && dest->SupportsTruncate();
}

bool PrefixLimitingBackwardWriterBase::TruncateImpl(Position new_size) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  BackwardWriter& dest = *DestWriter();
  SyncBuffer(dest);
  bool truncate_ok;
  if (ABSL_PREDICT_FALSE(new_size >
                         std::numeric_limits<Position>::max() - base_pos_)) {
    truncate_ok = false;
  } else {
    truncate_ok = dest.Truncate(new_size + base_pos_);
  }
  MakeBuffer(dest);
  return truncate_ok;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/prefix_limiting_backward_writer.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_PREFIX_LIMITING_BACKWARD_WRITER_H_
#define RIEGELI_BYTES_PREFIX_LIMITING_BACKWARD_WRITER_H_

#include <stddef.h>

#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"

namespace riegeli {

template <typename Src>
class PrefixLimitingReader;
class Reader;

// Template parameter independent part of `PrefixLimitingBackwardWriter`.
class PrefixLimitingBackwardWriterBase : public BackwardWriter {
 public:
  class Options {
   public:
    Options() noexcept {}

    // The base position of the original `BackwardWriter`. It must be at least
    // as large as the initial position.
    //
    // `std::nullopt` means the current position.
    //
    // Default: `std::nullopt`.
    Options& set_base_pos(std::optional<Position> base_pos) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      base_pos_ = base_pos;
      return *this;
    }
    Options&& set_base_pos(std::optional<Position> base_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_base_pos(base_pos));
    }
    std::optional<Position> base_pos() const { return base_pos_; }

   private:
    std::optional<Position> base_pos_;
  };

  // Returns the original `BackwardWriter`. Unchanged by `Close()`.
  virtual BackwardWriter* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns the base position of the original `BackwardWriter`.
  Position base_pos() const { return base_pos_; }
  bool SupportsTruncate() override;

 protected:
  using BackwardWriter::BackwardWriter;

  PrefixLimitingBackwardWriterBase(
      PrefixLimitingBackwardWriterBase&& that) noexcept;
  PrefixLimitingBackwardWriterBase& operator=(
      PrefixLimitingBackwardWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(BackwardWriter* dest, std::optional<Position> base_pos);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverDest(absl::Status status);

  // Sets cursor of `dest` to cursor of `*this`.
  void SyncBuffer(BackwardWriter& dest);

  // Sets buffer pointers of `*this` to buffer pointers of `dest`, adjusting
  // `start()` to hide data already written. Fails `*this` if `dest` failed.
  void MakeBuffer(BackwardWriter& dest);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using BackwardWriter::WriteSlow;
  bool WriteSlow(absl::string_view src) override;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(absl::Cord&& src) override;
  bool WriteSlow(ByteFill src) override;
  bool TruncateImpl(Position new_size) override;

 private:
  // This template is defined and used only in prefix_limiting_writer.cc.
  template <typename Src>
  bool WriteInternal(Src&& src);

  Position base_pos_ = 0;

  // Invariants if `ok()`:
  //   `start() == DestWriter()->cursor()`
  //   `limit() == DestWriter()->limit()`
  //   `start_pos() == DestWriter()->pos() - base_pos_`
};

// A `BackwardWriter` which writes to another `BackwardWriter`, hiding data
// before a base position, and reporting positions shifted so that the base
// position appears as 0.
//
// `PositionShiftingBackwardWriter` can be used for shifting positions in the
// other direction.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the original `BackwardWriter`. `Dest` must support
// `Dependency<BackwardWriter*, Dest>`, e.g.
// `BackwardWriter*` (not owned, default), `ChainBackwardWriter<>` (owned),
// `std::unique_ptr<BackwardWriter>` (owned),
// `Any<BackwardWriter*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The original `BackwardWriter` must not be accessed until the
// `PrefixLimitingBackwardWriter` is closed or no longer used, except that it is
// allowed to read the destination of the original `BackwardWriter` immediately
// after `Flush()`.
template <typename Dest = BackwardWriter*>
class PrefixLimitingBackwardWriter : public PrefixLimitingBackwardWriterBase {
 public:
  // Creates a closed `PrefixLimitingBackwardWriter`.
  explicit PrefixLimitingBackwardWriter(Closed) noexcept
      : PrefixLimitingBackwardWriterBase(kClosed) {}

  // Will write to the original `BackwardWriter` provided by `dest`.
  explicit PrefixLimitingBackwardWriter(Initializer<Dest> dest,
                                        Options options = Options());

  PrefixLimitingBackwardWriter(PrefixLimitingBackwardWriter&& that) = default;
  PrefixLimitingBackwardWriter& operator=(PrefixLimitingBackwardWriter&& that) =
      default;

  // Makes `*this` equivalent to a newly constructed
  // `PrefixLimitingBackwardWriter`. This avoids constructing a temporary
  // `PrefixLimitingBackwardWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the original
  // `BackwardWriter`. Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  BackwardWriter* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  class Mover;

  // The object providing and possibly owning the original `BackwardWriter`.
  MovingDependency<BackwardWriter*, Dest, Mover> dest_;
};

explicit PrefixLimitingBackwardWriter(Closed)
    -> PrefixLimitingBackwardWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit PrefixLimitingBackwardWriter(
    Dest&& dest, PrefixLimitingBackwardWriterBase::Options options =
                     PrefixLimitingBackwardWriterBase::Options())
    -> PrefixLimitingBackwardWriter<TargetT<Dest>>;

// Implementation details follow.

inline PrefixLimitingBackwardWriterBase::PrefixLimitingBackwardWriterBase(
    PrefixLimitingBackwardWriterBase&& that) noexcept
    : BackwardWriter(static_cast<BackwardWriter&&>(that)),
      base_pos_(that.base_pos_) {}

inline PrefixLimitingBackwardWriterBase&
PrefixLimitingBackwardWriterBase::operator=(
    PrefixLimitingBackwardWriterBase&& that) noexcept {
  BackwardWriter::operator=(static_cast<BackwardWriter&&>(that));
  base_pos_ = that.base_pos_;
  return *this;
}

inline void PrefixLimitingBackwardWriterBase::Reset(Closed) {
  BackwardWriter::Reset(kClosed);
  base_pos_ = 0;
}

inline void PrefixLimitingBackwardWriterBase::Reset() {
  BackwardWriter::Reset();
  // `base_pos_` will be set by `Initialize()`.
}

inline void PrefixLimitingBackwardWriterBase::Initialize(
    BackwardWriter* dest, std::optional<Position> base_pos) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of PrefixLimitingBackwardWriter: "
         "null BackwardWriter pointer";
  if (base_pos == std::nullopt) {
    base_pos_ = dest->pos();
  } else {
    RIEGELI_ASSERT_LE(*base_pos, dest->pos())
        << "Failed precondition of PrefixLimitingBackwardWriter: "
           "current position below the base position";
    base_pos_ = *base_pos;
  }
  MakeBuffer(*dest);
}

inline void PrefixLimitingBackwardWriterBase::SyncBuffer(BackwardWriter& dest) {
  dest.set_cursor(cursor());
}

inline void PrefixLimitingBackwardWriterBase::MakeBuffer(BackwardWriter& dest) {
  RIEGELI_ASSERT_GE(dest.pos(), base_pos_)
      << "PrefixLimitingBackwardWriter destination "
         "changed position unexpectedly";
  set_buffer(dest.limit(), dest.available());
  set_start_pos(dest.pos() - base_pos_);
  if (ABSL_PREDICT_FALSE(!dest.ok())) {
    FailWithoutAnnotation(AnnotateOverDest(dest.status()));
  }
}

template <typename Dest>
class PrefixLimitingBackwardWriter<Dest>::Mover {
 public:
  static auto member() { return &PrefixLimitingBackwardWriter::dest_; }

  explicit Mover(PrefixLimitingBackwardWriter& self,
                 PrefixLimitingBackwardWriter& that)
      : uses_buffer_(self.start() != nullptr) {
    // Buffer pointers are already moved so `SyncBuffer()` is called on `self`.
    // `dest_` is not moved yet so `dest_` is taken from `that`.
    if (uses_buffer_) self.SyncBuffer(*that.dest_);
  }

  void Done(PrefixLimitingBackwardWriter& self) {
    if (uses_buffer_) self.MakeBuffer(*self.dest_);
  }

 private:
  bool uses_buffer_;
};

template <typename Dest>
inline PrefixLimitingBackwardWriter<Dest>::PrefixLimitingBackwardWriter(
    Initializer<Dest> dest, Options options)
    : dest_(std::move(dest)) {
  Initialize(dest_.get(), options.base_pos());
}

template <typename Dest>
inline void PrefixLimitingBackwardWriter<Dest>::Reset(Closed) {
  PrefixLimitingBackwardWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void PrefixLimitingBackwardWriter<Dest>::Reset(Initializer<Dest> dest,
                                                      Options options) {
  PrefixLimitingBackwardWriterBase::Reset();
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options.base_pos());
}

template <typename Dest>
void PrefixLimitingBackwardWriter<Dest>::Done() {
  PrefixLimitingBackwardWriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
}

template <typename Dest>
void PrefixLimitingBackwardWriter<Dest>::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  if (dest_.IsOwning()) {
    SyncBuffer(*dest_);
    dest_->SetWriteSizeHint(
        write_size_hint == std::nullopt
            ? std::nullopt
            : std::make_optional(SaturatingAdd(base_pos(), *write_size_hint)));
    MakeBuffer(*dest_);
  }
}

template <typename Dest>
bool PrefixLimitingBackwardWriter<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer(*dest_);
  bool flush_ok = true;
  if (flush_type != FlushType::kFromObject || dest_.IsOwning()) {
    flush_ok = dest_->Flush(flush_type);
  }
  MakeBuffer(*dest_);
  return flush_ok;
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_PREFIX_LIMITING_BACKWARD_WRITER_H_


================================================
FILE: riegeli/bytes/prefix_limiting_reader.cc
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/prefix_limiting_reader.h"

#include <stddef.h>

#include <limits>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void PrefixLimitingReaderBase::Done() {
  if (ABSL_PREDICT_TRUE(ok())) {
    Reader& src = *SrcReader();
    SyncBuffer(src);
  }
  Reader::Done();
}

absl::Status PrefixLimitingReaderBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    Reader& src = *SrcReader();
    SyncBuffer(src);
    status = src.AnnotateStatus(std::move(status));
    MakeBuffer(src);
  }
  // The status might have been annotated by `src` with the original position.
  // Clarify that the current position is the relative position instead of
  // delegating to `Reader::AnnotateStatusImpl()`.
  return AnnotateOverSrc(std::move(status));
}

absl::Status PrefixLimitingReaderBase::AnnotateOverSrc(absl::Status status) {
  if (is_open() && base_pos_ > 0) {
    return Annotate(status,
                    absl::StrCat("with relative position at byte ", pos()));
  }
  return status;
}

bool PrefixLimitingReaderBase::PullSlow(size_t min_length,
                                        size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Reader::PullSlow(): "
         "enough data available, use Pull() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool pull_ok = src.Pull(min_length, recommended_length);
  MakeBuffer(src);
  return pull_ok;
}

bool PrefixLimitingReaderBase::ReadSlow(size_t length, char* dest) {
  RIEGELI_ASSERT_LT(available(), length)
      << "Failed precondition of Reader::ReadSlow(char*): "
         "enough data available, use Read(char*) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool read_ok = src.Read(length, dest);
  MakeBuffer(src);
  return read_ok;
}

bool PrefixLimitingReaderBase::ReadSlow(size_t length, Chain& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "enough data available, use Read(Chain&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "Chain size overflow";
  return ReadInternal(length, dest);
}

bool PrefixLimitingReaderBase::ReadSlow(size_t length, absl::Cord& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "enough data available, use Read(Cord&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "Cord size overflow";
  return ReadInternal(length, dest);
}

template <typename Dest>
inline bool PrefixLimitingReaderBase::ReadInternal(size_t length, Dest& dest) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool read_ok = src.ReadAndAppend(length, dest);
  MakeBuffer(src);
  return read_ok;
}

bool PrefixLimitingReaderBase::CopySlow(Position length, Writer& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::CopySlow(Writer&): "
         "enough data available, use Copy(Writer&) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool copy_ok = src.Copy(length, dest);
  MakeBuffer(src);
  return copy_ok;
}

bool PrefixLimitingReaderBase::CopySlow(size_t length, BackwardWriter& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::CopySlow(BackwardWriter&): "
         "enough data available, use Copy(BackwardWriter&) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool copy_ok = src.Copy(length, dest);
  MakeBuffer(src);
  return copy_ok;
}

bool PrefixLimitingReaderBase::ReadSomeSlow(size_t max_length, char* dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "nothing to read, use ReadSome(char*) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "some data available, use ReadSome(char*) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool read_ok = src.ReadSome(max_length, dest);
  MakeBuffer(src);
  return read_ok;
}

bool PrefixLimitingReaderBase::CopySomeSlow(size_t max_length, Writer& dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::CopySomeSlow(Writer&): "
         "nothing to copy, use CopySome(Writer&) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::CopySomeSlow(Writer&): "
         "some data available, use CopySome(Writer&) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool copy_ok = src.CopySome(max_length, dest);
  MakeBuffer(src);
  return copy_ok;
}

void PrefixLimitingReaderBase::ReadHintSlow(size_t min_length,
                                            size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Reader::ReadHintSlow(): "
         "enough data available, use ReadHint() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  src.ReadHint(min_length, recommended_length);
  MakeBuffer(src);
}

bool PrefixLimitingReaderBase::ToleratesReadingAhead() {
  Reader* const src = SrcReader();
  return src != nullptr && src->ToleratesReadingAhead();
}

bool PrefixLimitingReaderBase::SupportsRandomAccess() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsRandomAccess();
}

bool PrefixLimitingReaderBase::SupportsRewind() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsRewind();
}

bool PrefixLimitingReaderBase::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of Reader::SeekSlow(): "
         "position in the buffer, use Seek() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  bool seek_ok;
  if (ABSL_PREDICT_FALSE(new_pos >
                         std::numeric_limits<Position>::max() - base_pos_)) {
    src.Seek(std::numeric_limits<Position>::max());
    seek_ok = false;
  } else {
    seek_ok = src.Seek(new_pos + base_pos_);
  }
  MakeBuffer(src);
  return seek_ok;
}

bool PrefixLimitingReaderBase::SupportsSize() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsSize();
}

std::optional<Position> PrefixLimitingReaderBase::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const std::optional<Position> size = src.Size();
  MakeBuffer(src);
  if (ABSL_PREDICT_FALSE(size == std::nullopt)) return std::nullopt;
  return SaturatingSub(*size, base_pos_);
}

bool PrefixLimitingReaderBase::SupportsNewReader() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsNewReader();
}

std::unique_ptr<Reader> PrefixLimitingReaderBase::NewReaderImpl(
    Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point
  // if `SrcReader()->SupportsNewReader()`.
  Reader& src = *SrcReader();
  std::unique_ptr<Reader> reader =
      src.NewReader(SaturatingAdd(initial_pos, base_pos_));
  if (ABSL_PREDICT_FALSE(reader == nullptr)) {
    FailWithoutAnnotation(AnnotateOverSrc(src.status()));
    return nullptr;
  }
  return std::make_unique<PrefixLimitingReader<std::unique_ptr<Reader>>>(
      std::move(reader),
      PrefixLimitingReaderBase::Options().set_base_pos(base_pos_));
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/prefix_limiting_reader.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_PREFIX_LIMITING_READER_H_
#define RIEGELI_BYTES_PREFIX_LIMITING_READER_H_

#include <stddef.h>

#include <memory>
#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

class BackwardWriter;
class Writer;

// Template parameter independent part of `PrefixLimitingReader`.
class PrefixLimitingReaderBase : public Reader {
 public:
  class Options {
   public:
    Options() noexcept {}

    // The base position of the original `Reader`. It must be at least as large
    // as the initial position.
    //
    // `std::nullopt` means the current position.
    //
    // Default: `std::nullopt`.
    Options& set_base_pos(std::optional<Position> base_pos) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      base_pos_ = base_pos;
      return *this;
    }
    Options&& set_base_pos(std::optional<Position> base_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_base_pos(base_pos));
    }
    std::optional<Position> base_pos() const { return base_pos_; }

   private:
    std::optional<Position> base_pos_;
  };

  // Returns the original `Reader`. Unchanged by `Close()`.
  virtual Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns the base position of the original `Reader`.
  Position base_pos() const { return base_pos_; }

  bool ToleratesReadingAhead() override;
  bool SupportsRandomAccess() override;
  bool SupportsRewind() override;
  bool SupportsSize() override;
  bool SupportsNewReader() override;

 protected:
  using Reader::Reader;

  PrefixLimitingReaderBase(PrefixLimitingReaderBase&& that) noexcept;
  PrefixLimitingReaderBase& operator=(PrefixLimitingReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(Reader* src, std::optional<Position> base_pos);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverSrc(absl::Status status);

  // Sets cursor of `src` to cursor of `*this`.
  void SyncBuffer(Reader& src);

  // Sets buffer pointers of `*this` to buffer pointers of `src`, adjusting
  // `start()` to hide data already read. Fails `*this` if `src` failed.
  void MakeBuffer(Reader& src);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool PullSlow(size_t min_length, size_t recommended_length) override;
  using Reader::ReadSlow;
  bool ReadSlow(size_t length, char* dest) override;
  bool ReadSlow(size_t length, Chain& dest) override;
  bool ReadSlow(size_t length, absl::Cord& dest) override;
  using Reader::CopySlow;
  bool CopySlow(Position length, Writer& dest) override;
  bool CopySlow(size_t length, BackwardWriter& dest) override;
  using Reader::ReadSomeSlow;
  bool ReadSomeSlow(size_t max_length, char* dest) override;
  using Reader::CopySomeSlow;
  bool CopySomeSlow(size_t max_length, Writer& dest) override;
  void ReadHintSlow(size_t min_length, size_t recommended_length) override;
  bool SeekSlow(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;

 private:
  // This template is defined and used only in prefix_limiting_reader.cc.
  template <typename Dest>
  bool ReadInternal(size_t length, Dest& dest);

  Position base_pos_ = 0;

  // Invariants if `is_open()`:
  //   `start() >= SrcReader()->cursor()`
  //   `limit() == SrcReader()->limit()`
  //   `limit_pos() == SrcReader()->limit_pos() - base_pos_`
};

// A `Reader` which reads from another `Reader`, hiding data before a base
// position, and reporting positions shifted so that the base position appears
// as 0.
//
// `PositionShiftingReader` can be used for shifting positions in the other
// direction.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the original `Reader`. `Src` must support
// `Dependency<Reader*, Src>`, e.g. `Reader*` (not owned, default),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The original `Reader` must not be accessed until the `PrefixLimitingReader`
// is closed or no longer used.
template <typename Src = Reader*>
class PrefixLimitingReader : public PrefixLimitingReaderBase {
 public:
  // Creates a closed `PrefixLimitingReader`.
  explicit PrefixLimitingReader(Closed) noexcept
      : PrefixLimitingReaderBase(kClosed) {}

  // Will read from the original `Reader` provided by `src`.
  explicit PrefixLimitingReader(Initializer<Src> src,
                                Options options = Options());

  PrefixLimitingReader(PrefixLimitingReader&& that) = default;
  PrefixLimitingReader& operator=(PrefixLimitingReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `PrefixLimitingReader`.
  // This avoids constructing a temporary `PrefixLimitingReader` and moving
  // from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());

  // Returns the object providing and possibly owning the original `Reader`.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;
  void SetReadAllHintImpl(bool read_all_hint) override;
  void VerifyEndImpl() override;
  bool SyncImpl(SyncType sync_type) override;

 private:
  class Mover;

  // The object providing and possibly owning the original `Reader`.
  MovingDependency<Reader*, Src, Mover> src_;
};

explicit PrefixLimitingReader(Closed)
    -> PrefixLimitingReader<DeleteCtad<Closed>>;
template <typename Src>
explicit PrefixLimitingReader(Src&& src,
                              PrefixLimitingReaderBase::Options options =
                                  PrefixLimitingReaderBase::Options())
    -> PrefixLimitingReader<TargetT<Src>>;

// Implementation details follow.

inline PrefixLimitingReaderBase::PrefixLimitingReaderBase(
    PrefixLimitingReaderBase&& that) noexcept
    : Reader(static_cast<Reader&&>(that)), base_pos_(that.base_pos_) {}

inline PrefixLimitingReaderBase& PrefixLimitingReaderBase::operator=(
    PrefixLimitingReaderBase&& that) noexcept {
  Reader::operator=(static_cast<Reader&&>(that));
  base_pos_ = that.base_pos_;
  return *this;
}

inline void PrefixLimitingReaderBase::Reset(Closed) {
  Reader::Reset(kClosed);
  base_pos_ = 0;
}

inline void PrefixLimitingReaderBase::Reset() {
  Reader::Reset();
  // `base_pos_` will be set by `Initialize()`.
}

inline void PrefixLimitingReaderBase::Initialize(
    Reader* src, std::optional<Position> base_pos) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of PrefixLimitingReader: null Reader pointer";
  if (base_pos == std::nullopt) {
    base_pos_ = src->pos();
  } else {
    RIEGELI_ASSERT_LE(*base_pos, src->pos())
        << "Failed precondition of PrefixLimitingReader: "
           "current position below the base position";
    base_pos_ = *base_pos;
  }
  MakeBuffer(*src);
}

inline void PrefixLimitingReaderBase::SyncBuffer(Reader& src) {
  src.set_cursor(cursor());
}

inline void PrefixLimitingReaderBase::MakeBuffer(Reader& src) {
  RIEGELI_ASSERT_GE(src.pos(), base_pos_)
      << "PrefixLimitingReader source changed position unexpectedly";
  set_buffer(src.cursor(), src.available());
  set_limit_pos(src.limit_pos() - base_pos_);
  if (ABSL_PREDICT_FALSE(!src.ok())) {
    FailWithoutAnnotation(AnnotateOverSrc(src.status()));
  }
}

template <typename Src>
inline PrefixLimitingReader<Src>::PrefixLimitingReader(Initializer<Src> src,
                                                       Options options)
    : src_(std::move(src)) {
  Initialize(src_.get(), options.base_pos());
}

template <typename Src>
class PrefixLimitingReader<Src>::Mover {
 public:
  static auto member() { return &PrefixLimitingReader::src_; }

  explicit Mover(PrefixLimitingReader& self, PrefixLimitingReader& that)
      : uses_buffer_(self.start() != nullptr) {
    // Buffer pointers are already moved so `SyncBuffer()` is called on `self`.
    // `src_` is not moved yet so `src_` is taken from `that`.
    if (uses_buffer_) self.SyncBuffer(*that.src_);
  }

  void Done(PrefixLimitingReader& self) {
    if (uses_buffer_) self.MakeBuffer(*self.src_);
  }

 private:
  bool uses_buffer_;
};

template <typename Src>
inline void PrefixLimitingReader<Src>::Reset(Closed) {
  PrefixLimitingReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void PrefixLimitingReader<Src>::Reset(Initializer<Src> src,
                                             Options options) {
  PrefixLimitingReaderBase::Reset();
  src_.Reset(std::move(src));
  Initialize(src_.get(), options.base_pos());
}

template <typename Src>
void PrefixLimitingReader<Src>::Done() {
  PrefixLimitingReaderBase::Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      FailWithoutAnnotation(AnnotateOverSrc(src_->status()));
    }
  }
}

template <typename Src>
void PrefixLimitingReader<Src>::SetReadAllHintImpl(bool read_all_hint) {
  if (src_.IsOwning()) {
    SyncBuffer(*src_);
    src_->SetReadAllHint(read_all_hint);
    MakeBuffer(*src_);
  }
}

template <typename Src>
void PrefixLimitingReader<Src>::VerifyEndImpl() {
  if (!src_.IsOwning()) {
    PrefixLimitingReaderBase::VerifyEndImpl();
  } else if (ABSL_PREDICT_TRUE(ok())) {
    SyncBuffer(*src_);
    src_->VerifyEnd();
    MakeBuffer(*src_);
  }
}

template <typename Src>
bool PrefixLimitingReader<Src>::SyncImpl(SyncType sync_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer(*src_);
  bool sync_ok = true;
  if (sync_type != SyncType::kFromObject || src_.IsOwning()) {
    sync_ok = src_->Sync(sync_type);
  }
  MakeBuffer(*src_);
  return sync_ok;
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_PREFIX_LIMITING_READER_H_


================================================
FILE: riegeli/bytes/prefix_limiting_writer.cc
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/prefix_limiting_writer.h"

#include <stddef.h>

#include <limits>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/prefix_limiting_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void PrefixLimitingWriterBase::Done() {
  if (ABSL_PREDICT_TRUE(ok())) {
    Writer& dest = *DestWriter();
    SyncBuffer(dest);
  }
  Writer::Done();
  associated_reader_.Reset();
}

absl::Status PrefixLimitingWriterBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    Writer& dest = *DestWriter();
    SyncBuffer(dest);
    status = dest.AnnotateStatus(std::move(status));
    MakeBuffer(dest);
  }
  // The status might have been annotated by `dest` with the original position.
  // Clarify that the current position is the relative position instead of
  // delegating to `Writer::AnnotateStatusImpl()`.
  return AnnotateOverDest(std::move(status));
}

absl::Status PrefixLimitingWriterBase::AnnotateOverDest(absl::Status status) {
  if (is_open() && base_pos_ > 0) {
    return Annotate(status,
                    absl::StrCat("with relative position at byte ", pos()));
  }
  return status;
}

bool PrefixLimitingWriterBase::PushSlow(size_t min_length,
                                        size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Writer::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  SyncBuffer(dest);
  const bool push_ok = dest.Push(min_length, recommended_length);
  MakeBuffer(dest);
  return push_ok;
}

bool PrefixLimitingWriterBase::WriteSlow(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of Writer::WriteSlow(string_view): "
         "enough space available, use Write(string_view) instead";
  return WriteInternal(src);
}

bool PrefixLimitingWriterBase::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  return WriteInternal(std::move(src));
}

bool PrefixLimitingWriterBase::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  return WriteInternal(src);
}

bool PrefixLimitingWriterBase::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  return WriteInternal(std::move(src));
}

bool PrefixLimitingWriterBase::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  return WriteInternal(src);
}

bool PrefixLimitingWriterBase::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  return WriteInternal(std::move(src));
}

bool PrefixLimitingWriterBase::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  return WriteInternal(src);
}

template <typename Src>
inline bool PrefixLimitingWriterBase::WriteInternal(Src&& src) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  SyncBuffer(dest);
  const bool write_ok = dest.Write(std::forward<Src>(src));
  MakeBuffer(dest);
  return write_ok;
}

bool PrefixLimitingWriterBase::SupportsRandomAccess() {
  Writer* const dest = DestWriter();
  return dest != nullptr && dest->SupportsRandomAccess();
}

bool PrefixLimitingWriterBase::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT_NE(new_pos, pos())
      << "Failed precondition of Writer::SeekSlow(): "
         "position unchanged, use Seek() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  SyncBuffer(dest);
  bool seek_ok;
  if (ABSL_PREDICT_FALSE(new_pos >
                         std::numeric_limits<Position>::max() - base_pos_)) {
    dest.Seek(std::numeric_limits<Position>::max());
    seek_ok = false;
  } else {
    seek_ok = dest.Seek(new_pos + base_pos_);
  }
  MakeBuffer(dest);
  return seek_ok;
}

std::optional<Position> PrefixLimitingWriterBase::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  Writer& dest = *DestWriter();
  SyncBuffer(dest);
  const std::optional<Position> size = dest.Size();
  MakeBuffer(dest);
  if (ABSL_PREDICT_FALSE(size == std::nullopt)) return std::nullopt;
  return SaturatingSub(*size, base_pos_);
}

bool PrefixLimitingWriterBase::SupportsTruncate() {
  Writer* const dest = DestWriter();
  return dest != nullptr && dest->SupportsTruncate();
}

bool PrefixLimitingWriterBase::TruncateImpl(Position new_size) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  SyncBuffer(dest);
  bool truncate_ok;
  if (ABSL_PREDICT_FALSE(new_size >
                         std::numeric_limits<Position>::max() - base_pos_)) {
    dest.Seek(std::numeric_limits<Position>::max());
    truncate_ok = false;
  } else {
    truncate_ok = dest.Truncate(new_size + base_pos_);
  }
  MakeBuffer(dest);
  return truncate_ok;
}

bool PrefixLimitingWriterBase::SupportsReadMode() {
  Writer* const dest = DestWriter();
  return dest != nullptr && dest->SupportsReadMode();
}

Reader* PrefixLimitingWriterBase::ReadModeImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  Writer& dest = *DestWriter();
  SyncBuffer(dest);
  Reader* const reader = dest.ReadMode(SaturatingAdd(initial_pos, base_pos_));
  MakeBuffer(dest);
  if (ABSL_PREDICT_FALSE(reader == nullptr)) return nullptr;
  return associated_reader_.ResetReader(
      reader, PrefixLimitingReaderBase::Options().set_base_pos(base_pos_));
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/prefix_limiting_writer.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_PREFIX_LIMITING_WRITER_H_
#define RIEGELI_BYTES_PREFIX_LIMITING_WRITER_H_

#include <stddef.h>

#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

template <typename Src>
class PrefixLimitingReader;
class Reader;

// Template parameter independent part of `PrefixLimitingWriter`.
class PrefixLimitingWriterBase : public Writer {
 public:
  class Options {
   public:
    Options() noexcept {}

    // The base position of the original `Writer`. It must be at least as large
    // as the initial position.
    //
    // `std::nullopt` means the current position.
    //
    // Default: `std::nullopt`.
    Options& set_base_pos(std::optional<Position> base_pos) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      base_pos_ = base_pos;
      return *this;
    }
    Options&& set_base_pos(std::optional<Position> base_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_base_pos(base_pos));
    }
    std::optional<Position> base_pos() const { return base_pos_; }

   private:
    std::optional<Position> base_pos_;
  };

  // Returns the original `Writer`. Unchanged by `Close()`.
  virtual Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns the base position of the original `Writer`.
  Position base_pos() const { return base_pos_; }

  bool SupportsRandomAccess() override;
  bool SupportsTruncate() override;
  bool SupportsReadMode() override;

 protected:
  using Writer::Writer;

  PrefixLimitingWriterBase(PrefixLimitingWriterBase&& that) noexcept;
  PrefixLimitingWriterBase& operator=(PrefixLimitingWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(Writer* dest, std::optional<Position> base_pos);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverDest(absl::Status status);

  // Sets cursor of `dest` to cursor of `*this`.
  void SyncBuffer(Writer& dest);

  // Sets buffer pointers of `*this` to buffer pointers of `dest`, adjusting
  // `start()` to hide data already written. Fails `*this` if `dest` failed.
  void MakeBuffer(Writer& dest);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using Writer::WriteSlow;
  bool WriteSlow(absl::string_view src) override;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(absl::Cord&& src) override;
  bool WriteSlow(ByteFill src) override;
  bool SeekSlow(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  bool TruncateImpl(Position new_size) override;
  Reader* ReadModeImpl(Position initial_pos) override;

 private:
  // This template is defined and used only in prefix_limiting_writer.cc.
  template <typename Src>
  bool WriteInternal(Src&& src);

  Position base_pos_ = 0;

  AssociatedReader<PrefixLimitingReader<Reader*>> associated_reader_;

  // Invariants if `ok()`:
  //   `start() == DestWriter()->cursor()`
  //   `limit() == DestWriter()->limit()`
  //   `start_pos() == DestWriter()->pos() - base_pos_`
};

// A `Writer` which writes to another `Writer`, hiding data before a base
// position, and reporting positions shifted so that the base position appears
// as 0.
//
// `PositionShiftingWriter` can be used for shifting positions in the other
// direction.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the original `Writer`. `Dest` must support
// `Dependency<Writer*, Dest>`, e.g. `Writer*` (not owned, default),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The original `Writer` must not be accessed until the `PrefixLimitingWriter`
// is closed or no longer used, except that it is allowed to read the
// destination of the original `Writer` immediately after `Flush()`.
template <typename Dest = Writer*>
class PrefixLimitingWriter : public PrefixLimitingWriterBase {
 public:
  // Creates a closed `PrefixLimitingWriter`.
  explicit PrefixLimitingWriter(Closed) noexcept
      : PrefixLimitingWriterBase(kClosed) {}

  // Will write to the original `Writer` provided by `dest`.
  explicit PrefixLimitingWriter(Initializer<Dest> dest,
                                Options options = Options());

  PrefixLimitingWriter(PrefixLimitingWriter&& that) = default;
  PrefixLimitingWriter& operator=(PrefixLimitingWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `PrefixLimitingWriter`.
  // This avoids constructing a temporary `PrefixLimitingWriter` and moving
  // from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the original `Writer`.
  // Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  class Mover;

  // The object providing and possibly owning the original `Writer`.
  MovingDependency<Writer*, Dest, Mover> dest_;
};

explicit PrefixLimitingWriter(Closed)
    -> PrefixLimitingWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit PrefixLimitingWriter(Dest&& dest,
                              PrefixLimitingWriterBase::Options options =
                                  PrefixLimitingWriterBase::Options())
    -> PrefixLimitingWriter<TargetT<Dest>>;

// Implementation details follow.

inline PrefixLimitingWriterBase::PrefixLimitingWriterBase(
    PrefixLimitingWriterBase&& that) noexcept
    : Writer(static_cast<Writer&&>(that)),
      base_pos_(that.base_pos_),
      associated_reader_(std::move(that.associated_reader_)) {}

inline PrefixLimitingWriterBase& PrefixLimitingWriterBase::operator=(
    PrefixLimitingWriterBase&& that) noexcept {
  Writer::operator=(static_cast<Writer&&>(that));
  base_pos_ = that.base_pos_;
  associated_reader_ = std::move(that.associated_reader_);
  return *this;
}

inline void PrefixLimitingWriterBase::Reset(Closed) {
  Writer::Reset(kClosed);
  base_pos_ = 0;
  associated_reader_.Reset();
}

inline void PrefixLimitingWriterBase::Reset() {
  Writer::Reset();
  // `base_pos_` will be set by `Initialize()`.
  associated_reader_.Reset();
}

inline void PrefixLimitingWriterBase::Initialize(
    Writer* dest, std::optional<Position> base_pos) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of PrefixLimitingWriter: null Writer pointer";
  if (base_pos == std::nullopt) {
    base_pos_ = dest->pos();
  } else {
    RIEGELI_ASSERT_LE(*base_pos, dest->pos())
        << "Failed precondition of PrefixLimitingWriter: "
           "current position below the base position";
    base_pos_ = *base_pos;
  }
  MakeBuffer(*dest);
}

inline void PrefixLimitingWriterBase::SyncBuffer(Writer& dest) {
  dest.set_cursor(cursor());
}

inline void PrefixLimitingWriterBase::MakeBuffer(Writer& dest) {
  RIEGELI_ASSERT_GE(dest.pos(), base_pos_)
      << "PrefixLimitingWriter destination changed position unexpectedly";
  set_buffer(dest.cursor(), dest.available());
  set_start_pos(dest.pos() - base_pos_);
  if (ABSL_PREDICT_FALSE(!dest.ok())) {
    FailWithoutAnnotation(AnnotateOverDest(dest.status()));
  }
}

template <typename Dest>
class PrefixLimitingWriter<Dest>::Mover {
 public:
  static auto member() { return &PrefixLimitingWriter::dest_; }

  explicit Mover(PrefixLimitingWriter& self, PrefixLimitingWriter& that)
      : uses_buffer_(self.start() != nullptr) {
    // Buffer pointers are already moved so `SyncBuffer()` is called on `self`.
    // `dest_` is not moved yet so `dest_` is taken from `that`.
    if (uses_buffer_) self.SyncBuffer(*that.dest_);
  }

  void Done(PrefixLimitingWriter& self) {
    if (uses_buffer_) self.MakeBuffer(*self.dest_);
  }

 private:
  bool uses_buffer_;
};

template <typename Dest>
inline PrefixLimitingWriter<Dest>::PrefixLimitingWriter(Initializer<Dest> dest,
                                                        Options options)
    : dest_(std::move(dest)) {
  Initialize(dest_.get(), options.base_pos());
}

template <typename Dest>
inline void PrefixLimitingWriter<Dest>::Reset(Closed) {
  PrefixLimitingWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void PrefixLimitingWriter<Dest>::Reset(Initializer<Dest> dest,
                                              Options options) {
  PrefixLimitingWriterBase::Reset();
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options.base_pos());
}

template <typename Dest>
void PrefixLimitingWriter<Dest>::Done() {
  PrefixLimitingWriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
}

template <typename Dest>
void PrefixLimitingWriter<Dest>::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  if (dest_.IsOwning()) {
    SyncBuffer(*dest_);
    dest_->SetWriteSizeHint(
        write_size_hint == std::nullopt
            ? std::nullopt
            : std::make_optional(SaturatingAdd(base_pos(), *write_size_hint)));
    MakeBuffer(*dest_);
  }
}

template <typename Dest>
bool PrefixLimitingWriter<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer(*dest_);
  bool flush_ok = true;
  if (flush_type != FlushType::kFromObject || dest_.IsOwning()) {
    flush_ok = dest_->Flush(flush_type);
  }
  MakeBuffer(*dest_);
  return flush_ok;
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_PREFIX_LIMITING_WRITER_H_


================================================
FILE: riegeli/bytes/pullable_reader.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/pullable_reader.h"

#include <stddef.h>

#include <cstring>
#include <limits>
#include <memory>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/cord_buffer.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/cord_utils.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/sized_shared_buffer.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void PullableReader::Done() {
  if (ABSL_PREDICT_FALSE(scratch_used()) && !ScratchEnds()) {
    if (!SupportsRandomAccess()) {
      // Seeking back is not feasible.
      Reader::Done();
      scratch_.reset();
      return;
    }
    const Position new_pos = pos();
    SyncScratch();
    Seek(new_pos);
  }
  DoneBehindScratch();
  Reader::Done();
  scratch_.reset();
}

void PullableReader::DoneBehindScratch() {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::DoneBehindScratch(): "
         "scratch used";
  SyncBehindScratch(SyncType::kFromObject);
}

inline void PullableReader::SyncScratch() {
  RIEGELI_ASSERT(scratch_used())
      << "Failed precondition of PullableReader::SyncScratch(): "
         "scratch not used";
  RIEGELI_ASSERT_EQ(start(), scratch_->buffer.data())
      << "Failed invariant of PullableReader: "
         "scratch used but buffer pointers do not point to scratch";
  RIEGELI_ASSERT_EQ(start_to_limit(), scratch_->buffer.size())
      << "Failed invariant of PullableReader: "
         "scratch used but buffer pointers do not point to scratch";
  ClearScratch();
}

inline void PullableReader::ClearScratch() {
  scratch_->buffer.ClearAndShrink();
  set_buffer(scratch_->original_start, scratch_->original_start_to_limit,
             scratch_->original_start_to_cursor);
  move_limit_pos(available());
}

inline bool PullableReader::ScratchEnds() {
  RIEGELI_ASSERT(scratch_used())
      << "Failed precondition of PullableReader::ScratchEnds(): "
         "scratch not used";
  const size_t available_length = available();
  if (scratch_->original_start_to_cursor >= available_length) {
    SyncScratch();
    set_cursor(cursor() - available_length);
    return true;
  }
  return false;
}

bool PullableReader::PullSlow(size_t min_length, size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Reader::PullSlow(): "
         "enough data available, use Pull() instead";
  if (ABSL_PREDICT_TRUE(min_length == 1)) {
    if (ABSL_PREDICT_FALSE(scratch_used())) {
      SyncScratch();
      if (available() > 0) return true;
    }
    return PullBehindScratch(recommended_length);
  }
  if (scratch_used() && ScratchEnds() && available() >= min_length) return true;
  if (available() == 0) {
    RIEGELI_ASSERT(!scratch_used())
        << "Scratch should have ended but is still used";
    if (ABSL_PREDICT_FALSE(!PullBehindScratch(recommended_length))) {
      return false;
    }
    if (available() >= min_length) return true;
  }
  size_t remaining_min_length = min_length;
  recommended_length = UnsignedMax(min_length, recommended_length);
  std::unique_ptr<Scratch> new_scratch;
  if (ABSL_PREDICT_FALSE(scratch_ == nullptr)) {
    new_scratch = std::make_unique<Scratch>();
  } else {
    new_scratch = std::move(scratch_);
    if (!new_scratch->buffer.empty()) {
      // Scratch is used but it does not have enough data after the cursor.
      new_scratch->buffer.RemovePrefix(start_to_cursor());
      new_scratch->buffer.Shrink(recommended_length);
      remaining_min_length -= new_scratch->buffer.size();
      recommended_length -= new_scratch->buffer.size();
      set_buffer(new_scratch->original_start,
                 new_scratch->original_start_to_limit,
                 new_scratch->original_start_to_cursor);
      move_limit_pos(available());
    }
  }
  const absl::Span<char> flat_buffer = new_scratch->buffer.AppendBuffer(
      remaining_min_length, recommended_length);
  char* dest = flat_buffer.data();
  char* const min_limit = flat_buffer.data() + remaining_min_length;
  char* const recommended_limit = flat_buffer.data() + recommended_length;
  char* const max_limit = flat_buffer.data() + flat_buffer.size();
  do {
    const size_t length =
        UnsignedMin(available(), PtrDistance(dest, max_limit));
    riegeli::null_safe_memcpy(dest, cursor(), length);
    move_cursor(length);
    dest += length;
    if (dest >= min_limit) break;
    if (ABSL_PREDICT_FALSE(scratch_used())) {
      SyncScratch();
      if (available() > 0) continue;
    }
  } while (PullBehindScratch(PtrDistance(dest, recommended_limit)));
  new_scratch->buffer.RemoveSuffix(PtrDistance(dest, max_limit));
  set_limit_pos(pos());
  new_scratch->original_start = start();
  new_scratch->original_start_to_limit = start_to_limit();
  new_scratch->original_start_to_cursor = start_to_cursor();
  scratch_ = std::move(new_scratch);
  set_buffer(scratch_->buffer.data(), scratch_->buffer.size());
  return available() >= min_length;
}

bool PullableReader::ReadBehindScratch(size_t length, char* dest) {
  RIEGELI_ASSERT_LT(available(), length)
      << "Failed precondition of PullableReader::ReadBehindScratch(char*): "
         "enough data available, use Read(char*) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::ReadBehindScratch(char*): "
         "scratch used";
  do {
    const size_t available_length = available();
    riegeli::null_safe_memcpy(dest, cursor(), available_length);
    move_cursor(available_length);
    dest += available_length;
    length -= available_length;
    if (ABSL_PREDICT_FALSE(!PullBehindScratch(length))) return false;
  } while (length > available());
  std::memcpy(dest, cursor(), length);
  move_cursor(length);
  return true;
}

bool PullableReader::ReadBehindScratch(size_t length, Chain& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of PullableReader::ReadBehindScratch(Chain&): "
         "enough data available, use Read(Chain&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of PullableReader::ReadBehindScratch(Chain&): "
         "Chain size overflow";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::ReadBehindScratch(Chain&): "
         "scratch used";
  do {
    const absl::Span<char> buffer = dest.AppendBuffer(1, length, length);
    size_t length_read;
    if (ABSL_PREDICT_FALSE(!Read(buffer.size(), buffer.data(), &length_read))) {
      dest.RemoveSuffix(buffer.size() - length_read);
      return false;
    }
    length -= length_read;
  } while (length > 0);
  return true;
}

bool PullableReader::ReadBehindScratch(size_t length, absl::Cord& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of PullableReader::ReadBehindScratch(Cord&): "
         "enough data available, use Read(Cord&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of PullableReader::ReadBehindScratch(Cord&): "
         "Cord size overflow";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::ReadBehindScratch(Cord&): "
         "scratch used";
  absl::CordBuffer buffer = dest.GetCustomAppendBuffer(
      cord_internal::kCordBufferBlockSize, length, 1);
  absl::Span<char> span = buffer.available_up_to(length);
  if (buffer.capacity() < kDefaultMinBlockSize && length > span.size()) {
    absl::CordBuffer new_buffer = absl::CordBuffer::CreateWithCustomLimit(
        cord_internal::kCordBufferBlockSize, buffer.length() + length);
    std::memcpy(new_buffer.data(), buffer.data(), buffer.length());
    new_buffer.SetLength(buffer.length());
    buffer = std::move(new_buffer);
    span = buffer.available_up_to(length);
  }
  for (;;) {
    size_t length_read;
    const bool read_ok = Read(span.size(), span.data(), &length_read);
    buffer.IncreaseLengthBy(length_read);
    dest.Append(std::move(buffer));
    if (ABSL_PREDICT_FALSE(!read_ok)) return false;
    length -= length_read;
    if (length == 0) return true;
    buffer = absl::CordBuffer::CreateWithCustomLimit(
        cord_internal::kCordBufferBlockSize, length);
    span = buffer.available_up_to(length);
  }
}

bool PullableReader::CopyBehindScratch(Position length, Writer& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of PullableReader::CopyBehindScratch(Writer&): "
         "enough data available, use Copy(Writer&) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::CopyBehindScratch(Writer&): "
         "scratch used";
  while (length > available()) {
    const absl::string_view data(cursor(), available());
    move_cursor(data.size());
    if (ABSL_PREDICT_FALSE(!dest.Write(data))) return false;
    length -= data.size();
    if (ABSL_PREDICT_FALSE(!PullBehindScratch(length))) return false;
  }
  const absl::string_view data(cursor(), IntCast<size_t>(length));
  move_cursor(IntCast<size_t>(length));
  return dest.Write(data);
}

bool PullableReader::CopyBehindScratch(size_t length, BackwardWriter& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of "
         "PullableReader::CopyBehindScratch(BackwardWriter&): "
         "enough data available, use Copy(BackwardWriter&) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PullableReader::CopyBehindScratch(BackwardWriter&): "
         "scratch used";
  if (length <= available()) {
    const absl::string_view data(cursor(), length);
    move_cursor(length);
    return dest.Write(data);
  }
  if (length <= kMaxBytesToCopy) {
    if (ABSL_PREDICT_FALSE(!dest.Push(length))) return false;
    dest.move_cursor(length);
    if (ABSL_PREDICT_FALSE(!ReadBehindScratch(length, dest.cursor()))) {
      dest.set_cursor(dest.cursor() + length);
      return false;
    }
    return true;
  }
  Chain data;
  if (ABSL_PREDICT_FALSE(!ReadBehindScratch(length, data))) return false;
  return dest.Write(std::move(data));
}

bool PullableReader::ReadSomeBehindScratch(size_t max_length, char* dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of PullableReader::ReadSomeBehindScratch(char*): "
         "nothing to read, use ReadSome(char*) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of PullableReader::ReadSomeBehindScratch(char*): "
         "some data available, use ReadSome(char*) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::ReadSomeBehindScratch(char*): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!PullBehindScratch(max_length))) return false;
  max_length = UnsignedMin(max_length, available());
  std::memcpy(dest, cursor(), max_length);
  move_cursor(max_length);
  return true;
}

bool PullableReader::CopySomeBehindScratch(size_t max_length, Writer& dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of "
         "PullableReader::CopySomeBehindScratch(Writer&): "
         "nothing to read, use CopySome(Writer&) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of "
         "PullableReader::CopySomeBehindScratch(Writer&): "
         "some data available, use CopySome(Writer&) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PullableReader::CopySomeBehindScratch(Writer&): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!PullBehindScratch(max_length))) return false;
  max_length = UnsignedMin(max_length, available());
  if (available() >= max_length && max_length <= kMaxBytesToCopy) {
    const absl::string_view data(cursor(), max_length);
    move_cursor(max_length);
    return dest.Write(data);
  }
  return CopyBehindScratch(max_length, dest);
}

void PullableReader::ReadHintBehindScratch(size_t min_length,
                                           size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of PullableReader::ReadHintBehindScratch(): "
         "enough data available, use ReadHint() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::ReadHintBehindScratch(): "
         "scratch used";
}

bool PullableReader::SyncBehindScratch(SyncType sync_type) {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::SyncBehindScratch(): "
         "scratch used";
  return ok();
}

bool PullableReader::SeekBehindScratch(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of PullableReader::SeekBehindScratch(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::SeekBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(new_pos <= limit_pos())) {
    return Fail(
        absl::UnimplementedError("Reader::Seek() backwards not supported"));
  }
  // Seeking forwards.
  do {
    move_cursor(available());
    if (ABSL_PREDICT_FALSE(!PullBehindScratch(0))) return false;
  } while (new_pos > limit_pos());
  const Position available_length = limit_pos() - new_pos;
  RIEGELI_ASSERT_LE(available_length, start_to_limit())
      << "PullableReader::PullBehindScratch() skipped some data";
  set_cursor(limit() - available_length);
  return true;
}

bool PullableReader::ReadSlow(size_t length, char* dest) {
  RIEGELI_ASSERT_LT(available(), length)
      << "Failed precondition of Reader::ReadSlow(char*): "
         "enough data available, use Read(char*) instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (!ScratchEnds()) {
      const size_t length_to_read = available();
      std::memcpy(dest, cursor(), length_to_read);
      dest += length_to_read;
      length -= length_to_read;
      move_cursor(length_to_read);
      SyncScratch();
    }
    if (available() >= length) {
      riegeli::null_safe_memcpy(dest, cursor(), length);
      move_cursor(length);
      return true;
    }
  }
  return ReadBehindScratch(length, dest);
}

bool PullableReader::ReadSlow(size_t length, Chain& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "enough data available, use Read(Chain&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "Chain size overflow";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (!ScratchEnds()) {
      if (available() >= length) {
        dest.Append(
            ExternalRef(scratch_->buffer, absl::string_view(cursor(), length)));
        move_cursor(length);
        return true;
      }
      length -= available();
      dest.Append(ExternalRef(std::move(scratch_->buffer),
                              absl::string_view(cursor(), available())));
      ClearScratch();
    }
    if (available() >= length && length <= kMaxBytesToCopy) {
      dest.Append(absl::string_view(cursor(), length));
      move_cursor(length);
      return true;
    }
  }
  return ReadBehindScratch(length, dest);
}

bool PullableReader::ReadSlow(size_t length, absl::Cord& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "enough data available, use Read(Cord&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "Cord size overflow";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (!ScratchEnds()) {
      if (available() >= length) {
        ExternalRef(scratch_->buffer, absl::string_view(cursor(), length))
            .AppendTo(dest);
        move_cursor(length);
        return true;
      }
      length -= available();
      ExternalRef(std::move(scratch_->buffer),
                  absl::string_view(cursor(), available()))
          .AppendTo(dest);
      ClearScratch();
    }
    if (available() >= length && length <= kMaxBytesToCopy) {
      dest.Append(absl::string_view(cursor(), length));
      move_cursor(length);
      return true;
    }
  }
  return ReadBehindScratch(length, dest);
}

bool PullableReader::CopySlow(Position length, Writer& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::CopySlow(Writer&): "
         "enough data available, use Copy(Writer&) instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (!ScratchEnds()) {
      if (available() >= length) {
        const absl::string_view data(cursor(), length);
        move_cursor(length);
        return dest.Write(ExternalRef(scratch_->buffer, data));
      }
      length -= available();
      const bool write_ok =
          dest.Write(ExternalRef(std::move(scratch_->buffer),
                                 absl::string_view(cursor(), available())));
      ClearScratch();
      if (ABSL_PREDICT_FALSE(!write_ok)) return false;
    }
    if (available() >= length && length <= kMaxBytesToCopy) {
      const absl::string_view data(cursor(), IntCast<size_t>(length));
      move_cursor(IntCast<size_t>(length));
      return dest.Write(data);
    }
  }
  return CopyBehindScratch(length, dest);
}

bool PullableReader::CopySlow(size_t length, BackwardWriter& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::CopySlow(BackwardWriter&): "
         "enough data available, use Copy(BackwardWriter&) instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    Chain from_scratch;
    if (!ScratchEnds()) {
      if (available() >= length) {
        const absl::string_view data(cursor(), length);
        move_cursor(length);
        return dest.Write(ExternalRef(scratch_->buffer, data));
      }
      length -= available();
      from_scratch =
          Chain(ExternalRef(std::move(scratch_->buffer),
                            absl::string_view(cursor(), available())));
      ClearScratch();
    }
    if (available() >= length && length <= kMaxBytesToCopy) {
      const absl::string_view data(cursor(), length);
      move_cursor(length);
      if (ABSL_PREDICT_FALSE(!dest.Write(data))) return false;
    } else {
      if (ABSL_PREDICT_FALSE(!CopyBehindScratch(length, dest))) return false;
    }
    return dest.Write(std::move(from_scratch));
  }
  return CopyBehindScratch(length, dest);
}

bool PullableReader::ReadSomeSlow(size_t max_length, char* dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "nothing to read, use ReadSome(char*) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "some data available, use ReadSome(char*) instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    SyncScratch();
    if (available() > 0) {
      max_length = UnsignedMin(max_length, available());
      riegeli::null_safe_memcpy(dest, cursor(), max_length);
      move_cursor(max_length);
      return true;
    }
  }
  return ReadSomeBehindScratch(max_length, dest);
}

bool PullableReader::CopySomeSlow(size_t max_length, Writer& dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::CopySomeSlow(Writer&): "
         "nothing to read, use CopySome(Writer&) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::CopySomeSlow(Writer&): "
         "some data available, use CopySome(Writer&) instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (!ScratchEnds()) {
      max_length = UnsignedMin(max_length, available());
      const absl::string_view data(cursor(), max_length);
      move_cursor(max_length);
      return dest.Write(ExternalRef(scratch_->buffer, data));
    }
    if (available() > 0) {
      max_length = UnsignedMin(max_length, available());
      if (ABSL_PREDICT_TRUE(max_length <= kMaxBytesToCopy)) {
        const absl::string_view data(cursor(), max_length);
        move_cursor(max_length);
        return dest.Write(data);
      }
      return CopyBehindScratch(max_length, dest);
    }
  }
  return CopySomeBehindScratch(max_length, dest);
}

void PullableReader::ReadHintSlow(size_t min_length,
                                  size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Reader::ReadHintSlow(): "
         "enough data available, use ReadHint() instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (!ScratchEnds()) {
      recommended_length = UnsignedMax(recommended_length, min_length);
      min_length -= available();
      recommended_length -= available();
      BehindScratch behind_scratch(this);
      if (available() < min_length) {
        ReadHintBehindScratch(min_length, recommended_length);
      }
      return;
    }
    if (available() >= min_length) return;
  }
  ReadHintBehindScratch(min_length, recommended_length);
}

bool PullableReader::SyncImpl(SyncType sync_type) {
  if (ABSL_PREDICT_FALSE(scratch_used()) && !ScratchEnds()) {
    if (!SupportsRandomAccess()) {
      // Seeking back is not feasible.
      return ok();
    }
    const Position new_pos = pos();
    SyncScratch();
    Seek(new_pos);
  }
  return SyncBehindScratch(sync_type);
}

bool PullableReader::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of Reader::SeekSlow(): "
         "position in the buffer, use Seek() instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    SyncScratch();
    if (new_pos >= start_pos() && new_pos <= limit_pos()) {
      set_cursor(limit() - (limit_pos() - new_pos));
      return true;
    }
  }
  return SeekBehindScratch(new_pos);
}

void PullableReader::BehindScratch::Enter() {
  RIEGELI_ASSERT(context_->scratch_used())
      << "Failed precondition of PullableReader::BehindScratch::Enter(): "
         "scratch not used";
  RIEGELI_ASSERT_EQ(context_->start(), context_->scratch_->buffer.data())
      << "Failed invariant of PullableReader: "
         "scratch used but buffer pointers do not point to scratch";
  RIEGELI_ASSERT_EQ(context_->start_to_limit(),
                    context_->scratch_->buffer.size())
      << "Failed invariant of PullableReader: "
         "scratch used but buffer pointers do not point to scratch";
  scratch_ = std::move(context_->scratch_);
  read_from_scratch_ = context_->start_to_cursor();
  context_->set_buffer(scratch_->original_start,
                       scratch_->original_start_to_limit,
                       scratch_->original_start_to_cursor);
  context_->move_limit_pos(context_->available());
}

void PullableReader::BehindScratch::Leave() {
  RIEGELI_ASSERT_NE(scratch_, nullptr)
      << "Failed precondition of PullableReader::BehindScratch::Leave(): "
         "scratch not used";
  context_->set_limit_pos(context_->pos());
  scratch_->original_start = context_->start();
  scratch_->original_start_to_limit = context_->start_to_limit();
  scratch_->original_start_to_cursor = context_->start_to_cursor();
  context_->set_buffer(scratch_->buffer.data(), scratch_->buffer.size(),
                       read_from_scratch_);
  context_->scratch_ = std::move(scratch_);
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/pullable_reader.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_PULLABLE_READER_H_
#define RIEGELI_BYTES_PULLABLE_READER_H_

#include <stddef.h>

#include <memory>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/strings/cord.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/object.h"
#include "riegeli/base/sized_shared_buffer.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

class BackwardWriter;
class Writer;

// Abstract class `PullableReader` helps to implement
// `Reader::PullSlow(min_length, recommended_length)` with `min_length > 1`.
//
// `PullableReader` accumulates pulled data in a scratch buffer if needed.
class PullableReader : public Reader {
 protected:
  class BehindScratch;

  using Reader::Reader;

  PullableReader(PullableReader&& that) noexcept;
  PullableReader& operator=(PullableReader&& that) noexcept;

  // Makes `*this` equivalent to a newly constructed `PullableReader`. This
  // avoids constructing a temporary `PullableReader` and moving from it.
  // Derived classes which redefine `Reset()` should include a call to
  // `PullableReader::Reset()`.
  void Reset(Closed);
  void Reset();

  void Done() override;

  // Returns `true` if scratch is used, which means that buffer pointers are
  // temporarily unrelated to the source. This is exposed for assertions.
  bool scratch_used() const;

  // `PullableReader::{Done,SyncImpl}()` seek the source back to the current
  // position if scratch is used but not all data from scratch were read.
  // This is feasible only if `SupportsRandomAccess()`.
  //
  // Warning: if `!SupportsRandomAccess()`, the source will have an
  // unpredictable amount of extra data consumed because of buffering.
  //
  // For propagating `{Close,Sync}()` to dependencies, `{Done,SyncImpl}()`
  // should be overridden to call `PullableReader::{Done,SyncImpl}()` and then
  // close/sync the dependencies.

  // Implementation of `Done()`, called while scratch is not used. This is
  // called before buffer pointers are reset.
  //
  // If scratch was used but not all data from scratch were read and
  // `!SupportsRandomAccess()`, seeking back is not feasible and
  // `DoneBehindScratch()` is not called.
  //
  // By default calls `SyncBehindScratch(SyncType::kFromObject)`, which by
  // default does nothing.
  //
  // Precondition: `!scratch_used()`
  virtual void DoneBehindScratch();

  // Implementation of `PullSlow(1, recommended_length)`, called while scratch
  // is not used.
  //
  // Preconditions:
  //   `available() == 0`
  //   `!scratch_used()`
  virtual bool PullBehindScratch(size_t recommended_length) = 0;

  // Implementation of `ReadSlow()`, `CopySlow()`, `ReadSomeSlow()`,
  // `CopySomeSlow()`, `ReadHintSlow()`, `SyncImpl()`, and `SeekSlow()`, called
  // while scratch is not used.
  //
  // Regarding `SyncBehindScratch()`, if scratch was used but not all data from
  // scratch were read and `!SupportsRandomAccess()`, seeking back is not
  // feasible and `SyncBehindScratch()` is not called.
  //
  // By default they are implemented analogously to the corresponding `Reader`
  // functions.
  //
  // Preconditions:
  //   like the corresponding `Reader` functions
  //   `!scratch_used()`
  virtual bool ReadBehindScratch(size_t length, char* dest);
  virtual bool ReadBehindScratch(size_t length, Chain& dest);
  virtual bool ReadBehindScratch(size_t length, absl::Cord& dest);
  virtual bool CopyBehindScratch(Position length, Writer& dest);
  virtual bool CopyBehindScratch(size_t length, BackwardWriter& dest);
  virtual bool ReadSomeBehindScratch(size_t max_length, char* dest);
  virtual bool CopySomeBehindScratch(size_t max_length, Writer& dest);
  virtual void ReadHintBehindScratch(size_t min_length,
                                     size_t recommended_length);
  virtual bool SyncBehindScratch(SyncType sync_type);
  virtual bool SeekBehindScratch(Position new_pos);

  bool PullSlow(size_t min_length, size_t recommended_length) override;
  using Reader::ReadSlow;
  bool ReadSlow(size_t length, char* dest) override;
  bool ReadSlow(size_t length, Chain& dest) override;
  bool ReadSlow(size_t length, absl::Cord& dest) override;
  using Reader::CopySlow;
  bool CopySlow(Position length, Writer& dest) override;
  bool CopySlow(size_t length, BackwardWriter& dest) override;
  using Reader::ReadSomeSlow;
  bool ReadSomeSlow(size_t max_length, char* dest) override;
  using Reader::CopySomeSlow;
  bool CopySomeSlow(size_t max_length, Writer& dest) override;
  void ReadHintSlow(size_t min_length, size_t recommended_length) override;
  bool SyncImpl(SyncType sync_type) override;
  bool SeekSlow(Position new_pos) override;

 private:
  struct Scratch {
    SizedSharedBuffer buffer;
    const char* original_start = nullptr;
    size_t original_start_to_limit = 0;
    size_t original_start_to_cursor = 0;
  };

  void SyncScratch();
  void ClearScratch();

  // Stops using scratch and returns `true` if all remaining data in scratch
  // come from a single fragment of the original source.
  bool ScratchEnds();

  std::unique_ptr<Scratch> scratch_;

  // Invariants if `scratch_used()`:
  //   `start() == scratch_->buffer.data()`
  //   `start_to_limit() == scratch_->buffer.size()`
};

// Helps to implement move constructor or move assignment if scratch is used.
//
// Moving the source should be in scope of a `BehindScratch` object, unless
// source buffer pointers are known to remain unchanged during a move or their
// change does not need to be reflected elsewhere.
//
// This temporarily reveals the relationship between the source and the buffer
// pointers, in case it was hidden behind scratch usage. In a `BehindScratch`
// scope, scratch is not used, and buffer pointers may be changed. The current
// position reflects what has been read from the source and must not be changed.
class PullableReader::BehindScratch {
 public:
  explicit BehindScratch(PullableReader* context ABSL_ATTRIBUTE_LIFETIME_BOUND);

  BehindScratch(BehindScratch&& that) = default;
  BehindScratch& operator=(BehindScratch&&) = delete;

  ~BehindScratch();

 private:
  void Enter();
  void Leave();

  PullableReader* context_;
  std::unique_ptr<Scratch> scratch_;
  size_t read_from_scratch_;
};

// Implementation details follow.

inline PullableReader::PullableReader(PullableReader&& that) noexcept
    : Reader(static_cast<Reader&&>(that)), scratch_(std::move(that.scratch_)) {}

inline PullableReader& PullableReader::operator=(
    PullableReader&& that) noexcept {
  Reader::operator=(static_cast<Reader&&>(that));
  scratch_ = std::move(that.scratch_);
  return *this;
}

inline void PullableReader::Reset(Closed) {
  Reader::Reset(kClosed);
  scratch_.reset();
}

inline void PullableReader::Reset() {
  Reader::Reset();
  if (ABSL_PREDICT_FALSE(scratch_used())) scratch_->buffer.Clear();
}

inline bool PullableReader::scratch_used() const {
  return scratch_ != nullptr && !scratch_->buffer.empty();
}

inline PullableReader::BehindScratch::BehindScratch(
    PullableReader* context ABSL_ATTRIBUTE_LIFETIME_BOUND)
    : context_(context) {
  if (ABSL_PREDICT_FALSE(context_->scratch_used())) Enter();
}

inline PullableReader::BehindScratch::~BehindScratch() {
  if (ABSL_PREDICT_FALSE(scratch_ != nullptr)) Leave();
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_PULLABLE_READER_H_


================================================
FILE: riegeli/bytes/pushable_backward_writer.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/pushable_backward_writer.h"

#include <stddef.h>

#include <cstring>
#include <memory>
#include <optional>
#include <utility>
#include <vector>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/cord_utils.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/sized_shared_buffer.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"

namespace riegeli {

void PushableBackwardWriter::Done() {
  if (ABSL_PREDICT_TRUE(!scratch_used()) || ABSL_PREDICT_TRUE(SyncScratch())) {
    DoneBehindScratch();
  }
  BackwardWriter::Done();
  scratch_.reset();
}

void PushableBackwardWriter::OnFail() {
  BackwardWriter::OnFail();
  scratch_.reset();
}

void PushableBackwardWriter::DoneBehindScratch() {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableBackwardWriter::DoneBehindScratch(): "
         "scratch used";
  FlushBehindScratch(FlushType::kFromObject);
}

inline bool PushableBackwardWriter::SyncScratch() {
  RIEGELI_ASSERT(scratch_used())
      << "Failed precondition of PushableBackwardWriter::SyncScratch(): "
         "scratch not used";
  RIEGELI_ASSERT_EQ(limit(), scratch_->buffer.data())
      << "Failed invariant of PushableBackwardWriter: "
         "scratch used but buffer pointers do not point to scratch";
  RIEGELI_ASSERT_EQ(start_to_limit(), scratch_->buffer.size())
      << "Failed invariant of PushableBackwardWriter: "
         "scratch used but buffer pointers do not point to scratch";
  const size_t length_to_write = start_to_cursor();
  set_buffer(scratch_->original_limit, scratch_->original_start_to_limit,
             scratch_->original_start_to_cursor);
  set_start_pos(start_pos() - start_to_cursor());
  SizedSharedBuffer buffer = std::move(scratch_->buffer);
  RIEGELI_ASSERT(!scratch_used())
      << "Moving should have left the source SizedSharedBuffer cleared";
  const char* const data = buffer.data() + buffer.size() - length_to_write;
  if (ABSL_PREDICT_FALSE(!Write(ExternalRef(
          std::move(buffer), absl::string_view(data, length_to_write))))) {
    return false;
  }
  RIEGELI_ASSERT(!scratch_used())
      << "WriteSlow(absl::string_view) must not start using scratch, "
         "in particular if PushBehindScratch() calls ForcePushUsingScratch() "
         "then WriteSlow(absl::string_view) must be overridden to avoid "
         "indirectly calling ForcePushUsingScratch()";
  // Restore buffer allocation.
  buffer.ClearAndShrink();
  scratch_->buffer = std::move(buffer);
  return true;
}

bool PushableBackwardWriter::PushSlow(size_t min_length,
                                      size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of BackwardWriter::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    RIEGELI_ASSERT_EQ(limit(), scratch_->buffer.data())
        << "Failed invariant of PushableBackwardWriter: "
           "scratch used but buffer pointers do not point to scratch";
    RIEGELI_ASSERT_EQ(start_to_limit(), scratch_->buffer.size())
        << "Failed invariant of PushableBackwardWriter: "
           "scratch used but buffer pointers do not point to scratch";
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
    if (available() >= min_length) return true;
  }
  if (ABSL_PREDICT_TRUE(min_length == 1)) {
    return PushBehindScratch(recommended_length);
  }
  if (available() == 0) {
    if (ABSL_PREDICT_FALSE(!PushBehindScratch(recommended_length))) {
      return false;
    }
    if (available() >= min_length) return true;
    if (ABSL_PREDICT_FALSE(scratch_used())) {
      // `PushBehindScratch()` must have called `ForcePushUsingScratch()` but
      // scratch is too small.
      if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
      if (available() >= min_length) return true;
    }
  }
  if (ABSL_PREDICT_FALSE(scratch_ == nullptr)) {
    scratch_ = std::make_unique<Scratch>();
  }
  const absl::Span<char> flat_buffer =
      scratch_->buffer.PrependBuffer(min_length, recommended_length);
  set_start_pos(pos());
  scratch_->original_limit = limit();
  scratch_->original_start_to_limit = start_to_limit();
  scratch_->original_start_to_cursor = start_to_cursor();
  set_buffer(flat_buffer.data(), flat_buffer.size());
  return true;
}

bool PushableBackwardWriter::ForcePushUsingScratch() {
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of "
         "PushableBackwardWriter::ForcePushUsingScratch(): "
         "some space available, nothing to do";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PushableBackwardWriter::ForcePushUsingScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(scratch_ == nullptr)) {
    scratch_ = std::make_unique<Scratch>();
  }
  const absl::Span<char> flat_buffer = scratch_->buffer.PrependBuffer(1);
  set_start_pos(pos());
  scratch_->original_limit = limit();
  scratch_->original_start_to_limit = start_to_limit();
  scratch_->original_start_to_cursor = start_to_cursor();
  set_buffer(flat_buffer.data(), flat_buffer.size());
  return true;
}

bool PushableBackwardWriter::WriteBehindScratch(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of "
         "PushableBackwardWriter::WriteBehindScratch(string_view): "
         "enough space available, use Write(string_view) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PushableBackwardWriter::WriteBehindScratch(string_view): "
         "scratch used";
  do {
    const size_t available_length = available();
    move_cursor(available_length);
    riegeli::null_safe_memcpy(
        cursor(), src.data() + src.size() - available_length, available_length);
    src.remove_suffix(available_length);
    if (ABSL_PREDICT_FALSE(!PushBehindScratch(src.size()))) return false;
  } while (src.size() > available());
  move_cursor(src.size());
  std::memcpy(cursor(), src.data(), src.size());
  return true;
}

bool PushableBackwardWriter::WriteBehindScratch(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of "
         "PushableBackwardWriter::WriteBehindScratch(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PushableBackwardWriter::WriteBehindScratch(ExternalRef): "
         "scratch used";
  return Write(absl::string_view(src));
}

bool PushableBackwardWriter::WriteBehindScratch(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of "
         "PushableBackwardWriter::WriteBehindScratch(Chain): "
         "enough space available, use Write(Chain) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PushableBackwardWriter::WriteBehindScratch(Chain): "
         "scratch used";
  for (Chain::Blocks::const_reverse_iterator iter = src.blocks().crbegin();
       iter != src.blocks().crend(); ++iter) {
    if (ABSL_PREDICT_FALSE(!Write(absl::string_view(*iter)))) return false;
  }
  return true;
}

bool PushableBackwardWriter::WriteBehindScratch(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of "
         "PushableBackwardWriter::WriteBehindScratch(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PushableBackwardWriter::WriteBehindScratch(Chain&&): "
         "scratch used";
  // Not `std::move(src)`: forward to `WriteBehindScratch(const Chain&)`.
  return WriteBehindScratch(src);
}

bool PushableBackwardWriter::WriteBehindScratch(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of "
         "PushableBackwardWriter::WriteBehindScratch(Cord): "
         "enough space available, use Write(Cord) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PushableBackwardWriter::WriteBehindScratch(Cord): "
         "scratch used";
  if (const std::optional<absl::string_view> flat = src.TryFlat();
      flat != std::nullopt) {
    return Write(*flat);
  }
  if (src.size() <= available()) {
    move_cursor(src.size());
    cord_internal::CopyCordToArray(src, cursor());
    return true;
  }
  std::vector<absl::string_view> fragments(src.chunk_begin(), src.chunk_end());
  for (std::vector<absl::string_view>::const_reverse_iterator iter =
           fragments.crbegin();
       iter != fragments.crend(); ++iter) {
    if (ABSL_PREDICT_FALSE(!Write(*iter))) return false;
  }
  return true;
}

bool PushableBackwardWriter::WriteBehindScratch(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of "
         "PushableBackwardWriter::WriteBehindScratch(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PushableBackwardWriter::WriteBehindScratch(Cord&&): "
         "scratch used";
  // Not `std::move(src)`: forward to `WriteBehindScratch(const absl::Cord&)`.
  return WriteBehindScratch(src);
}

bool PushableBackwardWriter::WriteBehindScratch(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of "
         "PushableBackwardWriter::WriteBehindScratch(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PushableBackwardWriter::WriteBehindScratch(ByteFill): "
         "scratch used";
  while (src.size() > available()) {
    const size_t available_length = available();
    move_cursor(available_length);
    riegeli::null_safe_memset(cursor(), src.fill(), available_length);
    src.Extract(available_length);
    if (ABSL_PREDICT_FALSE(
            !PushBehindScratch(SaturatingIntCast<size_t>(src.size())))) {
      return false;
    }
  }
  move_cursor(IntCast<size_t>(src.size()));
  std::memset(cursor(), src.fill(), IntCast<size_t>(src.size()));
  return true;
}

bool PushableBackwardWriter::FlushBehindScratch(FlushType flush_type) {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableBackwardWriter::FlushBehindScratch(): "
         "scratch used";
  return ok();
}

bool PushableBackwardWriter::TruncateBehindScratch(Position new_size) {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PushableBackwardWriter::TruncateBehindScratch(): "
         "scratch used";
  return Fail(
      absl::UnimplementedError("BackwardWriter::Truncate() not supported"));
}

bool PushableBackwardWriter::WriteSlow(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(string_view): "
         "enough space available, use Write(string_view) instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
    if (available() >= src.size()) {
      move_cursor(src.size());
      riegeli::null_safe_memcpy(cursor(), src.data(), src.size());
      return true;
    }
  }
  return WriteBehindScratch(src);
}

bool PushableBackwardWriter::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
    if (available() >= src.size() && src.size() <= kMaxBytesToCopy) {
      move_cursor(src.size());
      riegeli::null_safe_memcpy(cursor(), src.data(), src.size());
      return true;
    }
  }
  return WriteBehindScratch(std::move(src));
}

bool PushableBackwardWriter::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
    if (available() >= src.size() && src.size() <= kMaxBytesToCopy) {
      move_cursor(src.size());
      src.CopyTo(cursor());
      return true;
    }
  }
  return WriteBehindScratch(src);
}

bool PushableBackwardWriter::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
    if (available() >= src.size() && src.size() <= kMaxBytesToCopy) {
      move_cursor(src.size());
      src.CopyTo(cursor());
      return true;
    }
  }
  return WriteBehindScratch(std::move(src));
}

bool PushableBackwardWriter::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
    if (available() >= src.size() && src.size() <= kMaxBytesToCopy) {
      move_cursor(src.size());
      cord_internal::CopyCordToArray(src, cursor());
      return true;
    }
  }
  return WriteBehindScratch(src);
}

bool PushableBackwardWriter::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
    if (available() >= src.size() && src.size() <= kMaxBytesToCopy) {
      move_cursor(src.size());
      cord_internal::CopyCordToArray(src, cursor());
      return true;
    }
  }
  return WriteBehindScratch(std::move(src));
}

bool PushableBackwardWriter::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
    if (available() >= src.size() && src.size() <= kMaxBytesToCopy) {
      riegeli::null_safe_memset(cursor(), src.fill(),
                                IntCast<size_t>(src.size()));
      move_cursor(IntCast<size_t>(src.size()));
      return true;
    }
  }
  return WriteBehindScratch(src);
}

bool PushableBackwardWriter::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
  }
  return FlushBehindScratch(flush_type);
}

bool PushableBackwardWriter::TruncateImpl(Position new_size) {
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
  }
  return TruncateBehindScratch(new_size);
}

void PushableBackwardWriter::BehindScratch::Enter() {
  RIEGELI_ASSERT(context_->scratch_used())
      << "Failed precondition of "
         "PushableBackwardWriter::BehindScratch::Enter(): "
         "scratch not used";
  RIEGELI_ASSERT_EQ(context_->limit(), context_->scratch_->buffer.data())
      << "Failed invariant of PushableBackwardWriter: "
         "scratch used but buffer pointers do not point to scratch";
  RIEGELI_ASSERT_EQ(context_->start_to_limit(),
                    context_->scratch_->buffer.size())
      << "Failed invariant of PushableBackwardWriter: "
         "scratch used but buffer pointers do not point to scratch";
  scratch_ = std::move(context_->scratch_);
  written_to_scratch_ = context_->start_to_cursor();
  context_->set_buffer(scratch_->original_limit,
                       scratch_->original_start_to_limit,
                       scratch_->original_start_to_cursor);
  context_->set_start_pos(context_->start_pos() - context_->start_to_cursor());
}

void PushableBackwardWriter::BehindScratch::Leave() {
  RIEGELI_ASSERT_NE(scratch_, nullptr)
      << "Failed precondition of "
         "PushableBackwardWriter::BehindScratch::Leave(): "
         "scratch not used";
  context_->set_start_pos(context_->pos());
  scratch_->original_limit = context_->limit();
  scratch_->original_start_to_limit = context_->start_to_limit();
  scratch_->original_limit = context_->limit();
  context_->set_buffer(const_cast<char*>(scratch_->buffer.data()),
                       scratch_->buffer.size(), written_to_scratch_);
  context_->scratch_ = std::move(scratch_);
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/pushable_backward_writer.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_PUSHABLE_BACKWARD_WRITER_H_
#define RIEGELI_BYTES_PUSHABLE_BACKWARD_WRITER_H_

#include <stddef.h>

#include <memory>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/object.h"
#include "riegeli/base/sized_shared_buffer.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"

namespace riegeli {

// Abstract class `PushableBackwardWriter` helps to implement
// `BackwardWriter::PushSlow(min_length, recommended_length)` with
// `min_length > 1`.
//
// `PushableBackwardWriter` accumulates data to be pushed in a scratch buffer if
// needed.
class PushableBackwardWriter : public BackwardWriter {
 protected:
  class BehindScratch;

  using BackwardWriter::BackwardWriter;

  PushableBackwardWriter(PushableBackwardWriter&& that) noexcept;
  PushableBackwardWriter& operator=(PushableBackwardWriter&& that) noexcept;

  // Makes `*this` equivalent to a newly constructed `PushableBackwardWriter`.
  // This avoids constructing a temporary `PushableBackwardWriter` and moving
  // from it. Derived classes which redefine `Reset()` should include a call to
  // `PushableBackwardWriter::Reset()`.
  void Reset(Closed);
  void Reset();

  void Done() override;
  void OnFail() override;

  // Returns `true` if scratch is currently in use, which means that buffer
  // pointers are temporarily unrelated to the destination. This is exposed for
  // assertions.
  bool scratch_used() const;

  // `PushableBackwardWriter::{Done,FlushImpl}()` write the scratch if needed,
  // and call
  // `{Done,Flush}BehindScratch()` to close/flush the destination before buffer
  // pointers are reset.
  //
  // For propagating `{Close,Flush}()` to dependencies, `{Done,FlushImpl}()`
  // should be overridden to call `PushableBackwardWriter::{Done,FlushImpl}()`
  // and then close/flush the dependencies.

  // Implementation of `Done()`, called while scratch is not used, and only if
  // writing the scratch succeeded. This is called before buffer pointers are
  // reset.
  //
  // By default calls `FlushBehindScratch(FlushType::kFromObject)`, which by
  // default does nothing.
  //
  // Precondition: `!scratch_used()`
  virtual void DoneBehindScratch();

  // Implementation of `PushSlow(1, recommended_length)`, called while scratch
  // is not used.
  //
  // Preconditions:
  //   `available() == 0`
  //   `!scratch_used()`
  virtual bool PushBehindScratch(size_t recommended_length) = 0;

  // Force using scratch as the buffer.
  //
  // This can be used in the implementation of `PushBehindScratch()` if in some
  // circumstances scratch should be used even when `min_length == 1`.
  //
  // These circumstances should be rare, otherwise performance would be poor
  // because `Push()` would call `PushSlow()` too often.
  //
  // Warning: `WriteSlow(absl::string_view)` or `WriteSlow(Chain)` is
  // called to stop using scratch by writing scratch contents, and the
  // default implementation of `WriteSlow(absl::string_view)` calls
  // `PushBehindScratch()`. This means that if `PushBehindScratch()` calls
  // `ForcePushUsingScratch()`, then `WriteSlow(absl::string_view)` must be
  // overridden to avoid indirectly calling `ForcePushUsingScratch()` or
  // `PushSlow(min_length > 1)`.
  //
  // Preconditions:
  //   `available() == 0`
  //   `!scratch_used()`
  //
  // Always returns `true`.
  //
  // Postconditions:
  //   `available() > 0`
  //   `scratch_used()`
  bool ForcePushUsingScratch();

  // Implementation of `WriteSlow()`, `FlushImpl()`, and `TruncateImpl()`,
  // called while scratch is not used.
  //
  // By default they are implemented analogously to the corresponding
  // `BackwardWriter` functions.
  //
  // Preconditions:
  //   like the corresponding `BackwardWriter` functions
  //   `!scratch_used()`
  virtual bool WriteBehindScratch(absl::string_view src);
  virtual bool WriteBehindScratch(ExternalRef src);
  virtual bool WriteBehindScratch(const Chain& src);
  virtual bool WriteBehindScratch(Chain&& src);
  virtual bool WriteBehindScratch(const absl::Cord& src);
  virtual bool WriteBehindScratch(absl::Cord&& src);
  virtual bool WriteBehindScratch(ByteFill src);
  virtual bool FlushBehindScratch(FlushType flush_type);
  virtual bool TruncateBehindScratch(Position new_size);

  bool PushSlow(size_t min_length, size_t recommended_length) override;
  bool WriteSlow(absl::string_view src) override;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(absl::Cord&& src) override;
  bool WriteSlow(ByteFill src) override;
  bool FlushImpl(FlushType flush_type) override;
  bool TruncateImpl(Position new_size) override;

 private:
  struct Scratch {
    SizedSharedBuffer buffer;
    char* original_limit = nullptr;
    size_t original_start_to_limit = 0;
    size_t original_start_to_cursor = 0;
  };

  bool SyncScratch();

  std::unique_ptr<Scratch> scratch_;

  // Invariants if `scratch_used()`:
  //   `limit() == scratch_->buffer.data()`
  //   `start_to_limit() == scratch_->buffer.data()`
};

// Helps to implement move constructor or move assignment if scratch is used.
//
// Moving the destination should be in scope of a `BehindScratch` object, unless
// destination buffer pointers are known to remain unchanged during a move or
// their change does not need to be reflected elsewhere.
//
// This temporarily reveals the relationship between the destination and the
// buffer pointers, in case it was hidden behind scratch usage. In a
// `BehindScratch` scope, scratch is not used, and buffer pointers may be
// changed. The current position reflects what has been written to the
// destination and must not be changed.
class PushableBackwardWriter::BehindScratch {
 public:
  explicit BehindScratch(
      PushableBackwardWriter* context ABSL_ATTRIBUTE_LIFETIME_BOUND);

  BehindScratch(BehindScratch&& that) = default;
  BehindScratch& operator=(BehindScratch&&) = delete;

  ~BehindScratch();

 private:
  void Enter();
  void Leave();

  PushableBackwardWriter* context_;
  std::unique_ptr<Scratch> scratch_;
  size_t written_to_scratch_;
};

// Implementation details follow.

inline PushableBackwardWriter::PushableBackwardWriter(
    PushableBackwardWriter&& that) noexcept
    : BackwardWriter(static_cast<BackwardWriter&&>(that)),
      scratch_(std::move(that.scratch_)) {}

inline PushableBackwardWriter& PushableBackwardWriter::operator=(
    PushableBackwardWriter&& that) noexcept {
  BackwardWriter::operator=(static_cast<BackwardWriter&&>(that));
  scratch_ = std::move(that.scratch_);
  return *this;
}

inline void PushableBackwardWriter::Reset(Closed) {
  BackwardWriter::Reset(kClosed);
  scratch_.reset();
}

inline void PushableBackwardWriter::Reset() {
  BackwardWriter::Reset();
  if (ABSL_PREDICT_FALSE(scratch_used())) scratch_->buffer.Clear();
}

inline bool PushableBackwardWriter::scratch_used() const {
  return scratch_ != nullptr && !scratch_->buffer.empty();
}

inline PushableBackwardWriter::BehindScratch::BehindScratch(
    PushableBackwardWriter* context ABSL_ATTRIBUTE_LIFETIME_BOUND)
    : context_(context) {
  if (ABSL_PREDICT_FALSE(context_->scratch_used())) Enter();
}

inline PushableBackwardWriter::BehindScratch::~BehindScratch() {
  if (ABSL_PREDICT_FALSE(scratch_ != nullptr)) Leave();
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_PUSHABLE_BACKWARD_WRITER_H_


================================================
FILE: riegeli/bytes/pushable_writer.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/pushable_writer.h"

#include <stddef.h>

#include <cstring>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/cord_utils.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/sized_shared_buffer.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void PushableWriter::Done() {
  if (ABSL_PREDICT_TRUE(!scratch_used()) || ABSL_PREDICT_TRUE(SyncScratch())) {
    DoneBehindScratch();
  }
  Writer::Done();
  scratch_.reset();
}

void PushableWriter::OnFail() {
  Writer::OnFail();
  scratch_.reset();
}

void PushableWriter::DoneBehindScratch() {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::DoneBehindScratch(): "
         "scratch used";
  FlushBehindScratch(FlushType::kFromObject);
}

inline bool PushableWriter::SyncScratch() {
  RIEGELI_ASSERT(scratch_used())
      << "Failed precondition of PushableWriter::SyncScratch(): "
         "scratch not used";
  RIEGELI_ASSERT_EQ(start(), scratch_->buffer.data())
      << "Failed invariant of PushableWriter: "
         "scratch used but buffer pointers do not point to scratch";
  RIEGELI_ASSERT_EQ(start_to_limit(), scratch_->buffer.size())
      << "Failed invariant of PushableWriter: "
         "scratch used but buffer pointers do not point to scratch";
  const size_t length_to_write = start_to_cursor();
  set_buffer(scratch_->original_start, scratch_->original_start_to_limit,
             scratch_->original_start_to_cursor);
  set_start_pos(start_pos() - start_to_cursor());
  SizedSharedBuffer buffer = std::move(scratch_->buffer);
  RIEGELI_ASSERT(!scratch_used())
      << "Moving should have left the source SizedSharedBuffer cleared";
  const char* const data = buffer.data();
  if (ABSL_PREDICT_FALSE(!Write(ExternalRef(
          std::move(buffer), absl::string_view(data, length_to_write))))) {
    return false;
  }
  RIEGELI_ASSERT(!scratch_used())
      << "WriteSlow(absl::string_view) must not start using scratch, "
         "in particular if PushBehindScratch() calls ForcePushUsingScratch() "
         "then WriteSlow(absl::string_view) must be overridden to avoid "
         "indirectly calling ForcePushUsingScratch()";
  // Restore buffer allocation.
  buffer.ClearAndShrink();
  scratch_->buffer = std::move(buffer);
  return true;
}

bool PushableWriter::PushSlow(size_t min_length, size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Writer::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    RIEGELI_ASSERT_EQ(start(), scratch_->buffer.data())
        << "Failed invariant of PushableWriter: "
           "scratch used but buffer pointers do not point to scratch";
    RIEGELI_ASSERT_EQ(start_to_limit(), scratch_->buffer.size())
        << "Failed invariant of PushableWriter: "
           "scratch used but buffer pointers do not point to scratch";
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
    if (available() >= min_length) return true;
  }
  if (ABSL_PREDICT_TRUE(min_length == 1)) {
    return PushBehindScratch(recommended_length);
  }
  if (available() == 0) {
    if (ABSL_PREDICT_FALSE(!PushBehindScratch(recommended_length))) {
      return false;
    }
    if (available() >= min_length) return true;
    if (ABSL_PREDICT_FALSE(scratch_used())) {
      // `PushBehindScratch()` must have called `ForcePushUsingScratch()` but
      // scratch is too small.
      if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
      if (available() >= min_length) return true;
    }
  }
  if (ABSL_PREDICT_FALSE(scratch_ == nullptr)) {
    scratch_ = std::make_unique<Scratch>();
  }
  const absl::Span<char> flat_buffer =
      scratch_->buffer.AppendBuffer(min_length, recommended_length);
  set_start_pos(pos());
  scratch_->original_start = start();
  scratch_->original_start_to_limit = start_to_limit();
  scratch_->original_start_to_cursor = start_to_cursor();
  set_buffer(flat_buffer.data(), flat_buffer.size());
  return true;
}

bool PushableWriter::ForcePushUsingScratch() {
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of PushableWriter::ForcePushUsingScratch(): "
         "some space available, nothing to do";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::ForcePushUsingScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(scratch_ == nullptr)) {
    scratch_ = std::make_unique<Scratch>();
  }
  const absl::Span<char> flat_buffer = scratch_->buffer.AppendBuffer(1);
  set_start_pos(pos());
  scratch_->original_start = start();
  scratch_->original_start_to_limit = start_to_limit();
  scratch_->original_start_to_cursor = start_to_cursor();
  set_buffer(flat_buffer.data(), flat_buffer.size());
  return true;
}

bool PushableWriter::WriteBehindScratch(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of "
         "PushableWriter::WriteBehindScratch(string_view): "
         "enough space available, use Write(string_view) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PushableWriter::WriteBehindScratch(string_view): "
         "scratch used";
  do {
    const size_t available_length = available();
    riegeli::null_safe_memcpy(cursor(), src.data(), available_length);
    move_cursor(available_length);
    src.remove_prefix(available_length);
    if (ABSL_PREDICT_FALSE(!PushBehindScratch(src.size()))) return false;
  } while (src.size() > available());
  std::memcpy(cursor(), src.data(), src.size());
  move_cursor(src.size());
  return true;
}

bool PushableWriter::WriteBehindScratch(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of "
         "PushableWriter::WriteBehindScratch(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PushableWriter::WriteBehindScratch(ExternalRef): "
         "scratch used";
  return Write(absl::string_view(src));
}

bool PushableWriter::WriteBehindScratch(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of PushableWriter::WriteBehindScratch(Chain): "
         "enough space available, use Write(Chain) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::WriteBehindScratch(Chain): "
         "scratch used";
  for (const absl::string_view fragment : src.blocks()) {
    if (ABSL_PREDICT_FALSE(!Write(fragment))) return false;
  }
  return true;
}

bool PushableWriter::WriteBehindScratch(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of PushableWriter::WriteBehindScratch(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::WriteBehindScratch(Chain&&): "
         "scratch used";
  // Not `std::move(src)`: forward to `WriteBehindScratch(const Chain&)`.
  return WriteBehindScratch(src);
}

bool PushableWriter::WriteBehindScratch(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of PushableWriter::WriteBehindScratch(Cord): "
         "enough space available, use Write(Cord) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::WriteBehindScratch(Cord): "
         "scratch used";
  if (const std::optional<absl::string_view> flat = src.TryFlat();
      flat != std::nullopt) {
    return Write(*flat);
  }
  for (const absl::string_view fragment : src.Chunks()) {
    if (ABSL_PREDICT_FALSE(!Write(fragment))) return false;
  }
  return true;
}

bool PushableWriter::WriteBehindScratch(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of PushableWriter::WriteBehindScratch(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::WriteBehindScratch(Cord&&): "
         "scratch used";
  // Not `std::move(src)`: forward to `WriteBehindScratch(const absl::Cord&)`.
  return WriteBehindScratch(src);
}

bool PushableWriter::WriteBehindScratch(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of "
         "PushableWriter::WriteBehindScratch(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PushableWriter::WriteBehindScratch(ByteFill): "
         "scratch used";
  while (src.size() > available()) {
    const size_t available_length = available();
    riegeli::null_safe_memset(cursor(), src.fill(), available_length);
    move_cursor(available_length);
    src.Extract(available_length);
    if (ABSL_PREDICT_FALSE(
            !PushBehindScratch(SaturatingIntCast<size_t>(src.size())))) {
      return false;
    }
  }
  std::memset(cursor(), src.fill(), IntCast<size_t>(src.size()));
  move_cursor(IntCast<size_t>(src.size()));
  return true;
}

bool PushableWriter::FlushBehindScratch(FlushType flush_type) {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::FlushBehindScratch(): "
         "scratch used";
  return ok();
}

bool PushableWriter::SeekBehindScratch(Position new_pos) {
  RIEGELI_ASSERT_NE(new_pos, pos())
      << "Failed precondition of PushableWriter::SeekBehindScratch(): "
         "position unchanged, use Seek() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::SeekBehindScratch(): "
         "scratch used";
  return Fail(absl::UnimplementedError("Writer::Seek() not supported"));
}

std::optional<Position> PushableWriter::SizeBehindScratch() {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::SizeBehindScratch(): "
         "scratch used";
  Fail(absl::UnimplementedError("Writer::Size() not supported"));
  return std::nullopt;
}

bool PushableWriter::TruncateBehindScratch(Position new_size) {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::TruncateBehindScratch(): "
         "scratch used";
  return Fail(absl::UnimplementedError("Writer::Truncate() not supported"));
}

Reader* PushableWriter::ReadModeBehindScratch(Position initial_pos) {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::ReadModeBehindScratch(): "
         "scratch used";
  Fail(absl::UnimplementedError("Writer::ReadMode() not supported"));
  return nullptr;
}

bool PushableWriter::WriteSlow(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of Writer::WriteSlow(string_view): "
         "enough space available, use Write(string_view) instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
    if (available() >= src.size()) {
      riegeli::null_safe_memcpy(cursor(), src.data(), src.size());
      move_cursor(src.size());
      return true;
    }
  }
  return WriteBehindScratch(src);
}

bool PushableWriter::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
    if (available() >= src.size() && src.size() <= kMaxBytesToCopy) {
      riegeli::null_safe_memcpy(cursor(), src.data(), src.size());
      move_cursor(src.size());
      return true;
    }
  }
  return WriteBehindScratch(std::move(src));
}

bool PushableWriter::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
    if (available() >= src.size() && src.size() <= kMaxBytesToCopy) {
      src.CopyTo(cursor());
      move_cursor(src.size());
      return true;
    }
  }
  return WriteBehindScratch(src);
}

bool PushableWriter::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
    if (available() >= src.size() && src.size() <= kMaxBytesToCopy) {
      src.CopyTo(cursor());
      move_cursor(src.size());
      return true;
    }
  }
  return WriteBehindScratch(std::move(src));
}

bool PushableWriter::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
    if (available() >= src.size() && src.size() <= kMaxBytesToCopy) {
      cord_internal::CopyCordToArray(src, cursor());
      move_cursor(src.size());
      return true;
    }
  }
  return WriteBehindScratch(src);
}

bool PushableWriter::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
    if (available() >= src.size() && src.size() <= kMaxBytesToCopy) {
      cord_internal::CopyCordToArray(src, cursor());
      move_cursor(src.size());
      return true;
    }
  }
  return WriteBehindScratch(std::move(src));
}

bool PushableWriter::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
    if (available() >= src.size() && src.size() <= kMaxBytesToCopy) {
      riegeli::null_safe_memset(cursor(), src.fill(),
                                IntCast<size_t>(src.size()));
      move_cursor(IntCast<size_t>(src.size()));
      return true;
    }
  }
  return WriteBehindScratch(src);
}

bool PushableWriter::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
  }
  return FlushBehindScratch(flush_type);
}

bool PushableWriter::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT_NE(new_pos, pos())
      << "Failed precondition of Writer::SeekSlow(): "
         "position unchanged, use Seek() instead";
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
  }
  return SeekBehindScratch(new_pos);
}

std::optional<Position> PushableWriter::SizeImpl() {
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return std::nullopt;
  }
  return SizeBehindScratch();
}

bool PushableWriter::TruncateImpl(Position new_size) {
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return false;
  }
  return TruncateBehindScratch(new_size);
}

Reader* PushableWriter::ReadModeImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(scratch_used())) {
    if (ABSL_PREDICT_FALSE(!SyncScratch())) return nullptr;
  }
  return ReadModeBehindScratch(initial_pos);
}

void PushableWriter::BehindScratch::Enter() {
  RIEGELI_ASSERT(context_->scratch_used())
      << "Failed precondition of PushableWriter::BehindScratch::Enter(): "
         "scratch not used";
  RIEGELI_ASSERT_EQ(context_->start(), context_->scratch_->buffer.data())
      << "Failed invariant of PushableWriter: "
         "scratch used but buffer pointers do not point to scratch";
  RIEGELI_ASSERT_EQ(context_->start_to_limit(),
                    context_->scratch_->buffer.size())
      << "Failed invariant of PushableWriter: "
         "scratch used but buffer pointers do not point to scratch";
  scratch_ = std::move(context_->scratch_);
  written_to_scratch_ = context_->start_to_cursor();
  context_->set_buffer(scratch_->original_start,
                       scratch_->original_start_to_limit,
                       scratch_->original_start_to_cursor);
  context_->set_start_pos(context_->start_pos() - context_->start_to_cursor());
}

void PushableWriter::BehindScratch::Leave() {
  RIEGELI_ASSERT_NE(scratch_, nullptr)
      << "Failed precondition of PushableWriter::BehindScratch::Leave(): "
         "scratch not used";
  context_->set_start_pos(context_->pos());
  scratch_->original_start = context_->start();
  scratch_->original_start_to_limit = context_->start_to_limit();
  scratch_->original_start_to_cursor = context_->start_to_cursor();
  context_->set_buffer(const_cast<char*>(scratch_->buffer.data()),
                       scratch_->buffer.size(), written_to_scratch_);
  context_->scratch_ = std::move(scratch_);
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/pushable_writer.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_PUSHABLE_WRITER_H_
#define RIEGELI_BYTES_PUSHABLE_WRITER_H_

#include <stddef.h>

#include <memory>
#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/object.h"
#include "riegeli/base/sized_shared_buffer.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

class Reader;

// Abstract class `PushableWriter` helps to implement
// `Writer::PushSlow(min_length, recommended_length)` with `min_length > 1`.
//
// `PushableWriter` accumulates data to be pushed in a scratch buffer if needed.
class PushableWriter : public Writer {
 protected:
  class BehindScratch;

  using Writer::Writer;

  PushableWriter(PushableWriter&& that) noexcept;
  PushableWriter& operator=(PushableWriter&& that) noexcept;

  // Makes `*this` equivalent to a newly constructed `PushableWriter`. This
  // avoids constructing a temporary `PushableWriter` and moving from it.
  // Derived classes which redefine `Reset()` should include a call to
  // `PushableWriter::Reset()`.
  void Reset(Closed);
  void Reset();

  void Done() override;
  void OnFail() override;

  // Returns `true` if scratch is currently in use, which means that buffer
  // pointers are temporarily unrelated to the destination. This is exposed for
  // assertions.
  bool scratch_used() const;

  // `PushableWriter::{Done,FlushImpl}()` write the scratch if needed, and call
  // `{Done,Flush}BehindScratch()` to close/flush the destination before buffer
  // pointers are reset.
  //
  // For propagating `{Close,Flush}()` to dependencies, `{Done,FlushImpl}()`
  // should be overridden to call `PushableWriter::{Done,FlushImpl}()` and then
  // close/flush the dependencies.

  // Implementation of `Done()`, called while scratch is not used, and only if
  // writing the scratch succeeded. This is called before buffer pointers are
  // reset.
  //
  // By default calls `FlushBehindScratch(FlushType::kFromObject)`, which by
  // default does nothing.
  //
  // Precondition: `!scratch_used()`
  virtual void DoneBehindScratch();

  // Implementation of `PushSlow(1, recommended_length)`, called while scratch
  // is not used.
  //
  // Preconditions:
  //   `available() == 0`
  //   `!scratch_used()`
  virtual bool PushBehindScratch(size_t recommended_length) = 0;

  // Force using scratch as the buffer.
  //
  // This can be used in the implementation of `PushBehindScratch()` if in some
  // circumstances scratch should be used even when `min_length == 1`.
  //
  // These circumstances should be rare, otherwise performance would be poor
  // because `Push()` would call `PushSlow()` too often.
  //
  // Warning: `WriteSlow(absl::string_view)` or `WriteSlow(Chain)` is
  // called to stop using scratch by writing scratch contents, and the
  // default implementation of `WriteSlow(absl::string_view)` calls
  // `PushBehindScratch()`. This means that if `PushBehindScratch()` calls
  // `ForcePushUsingScratch()`, then `WriteSlow(absl::string_view)` must be
  // overridden to avoid indirectly calling `ForcePushUsingScratch()` or
  // `PushSlow(min_length > 1)`.
  //
  // Preconditions:
  //   `available() == 0`
  //   `!scratch_used()`
  //
  // Always returns `true`.
  //
  // Postconditions:
  //   `available() > 0`
  //   `scratch_used()`
  bool ForcePushUsingScratch();

  // Implementation of `WriteSlow()`, `FlushImpl()`,`SeekSlow()`, `SizeImpl()`,
  // `TruncateImpl()`, and `ReadModeImpl()`, called while scratch is not used.
  //
  // By default they are implemented analogously to the corresponding `Writer`
  // functions.
  //
  // Preconditions:
  //   like the corresponding `Writer` functions
  //   `!scratch_used()`
  virtual bool WriteBehindScratch(absl::string_view src);
  virtual bool WriteBehindScratch(ExternalRef src);
  virtual bool WriteBehindScratch(const Chain& src);
  virtual bool WriteBehindScratch(Chain&& src);
  virtual bool WriteBehindScratch(const absl::Cord& src);
  virtual bool WriteBehindScratch(absl::Cord&& src);
  virtual bool WriteBehindScratch(ByteFill src);
  virtual bool FlushBehindScratch(FlushType flush_type);
  virtual bool SeekBehindScratch(Position new_pos);
  virtual std::optional<Position> SizeBehindScratch();
  virtual bool TruncateBehindScratch(Position new_size);
  virtual Reader* ReadModeBehindScratch(Position initial_pos);

  bool PushSlow(size_t min_length, size_t recommended_length) override;
  bool WriteSlow(absl::string_view src) override;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(absl::Cord&& src) override;
  bool WriteSlow(ByteFill src) override;
  bool FlushImpl(FlushType flush_type) override;
  bool SeekSlow(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  bool TruncateImpl(Position new_size) override;
  Reader* ReadModeImpl(Position initial_pos) override;

 private:
  struct Scratch {
    SizedSharedBuffer buffer;
    char* original_start = nullptr;
    size_t original_start_to_limit = 0;
    size_t original_start_to_cursor = 0;
  };

  bool SyncScratch();

  std::unique_ptr<Scratch> scratch_;

  // Invariants if `scratch_used()`:
  //   `start() == scratch_->buffer.data()`
  //   `start_to_limit() == scratch_->buffer.size()`
};

// Helps to implement move constructor or move assignment if scratch is used.
//
// Moving the destination should be in scope of a `BehindScratch` object, unless
// destination buffer pointers are known to remain unchanged during a move or
// their change does not need to be reflected elsewhere.
//
// This temporarily reveals the relationship between the destination and the
// buffer pointers, in case it was hidden behind scratch usage. In a
// `BehindScratch` scope, scratch is not used, and buffer pointers may be
// changed. The current position reflects what has been written to the
// destination and must not be changed.
class PushableWriter::BehindScratch {
 public:
  explicit BehindScratch(PushableWriter* context ABSL_ATTRIBUTE_LIFETIME_BOUND);

  BehindScratch(BehindScratch&& that) = default;
  BehindScratch& operator=(BehindScratch&&) = delete;

  ~BehindScratch();

 private:
  void Enter();
  void Leave();

  PushableWriter* context_;
  std::unique_ptr<Scratch> scratch_;
  size_t written_to_scratch_;
};

// Implementation details follow.

inline PushableWriter::PushableWriter(PushableWriter&& that) noexcept
    : Writer(static_cast<Writer&&>(that)), scratch_(std::move(that.scratch_)) {}

inline PushableWriter& PushableWriter::operator=(
    PushableWriter&& that) noexcept {
  Writer::operator=(static_cast<Writer&&>(that));
  scratch_ = std::move(that.scratch_);
  return *this;
}

inline void PushableWriter::Reset(Closed) {
  Writer::Reset(kClosed);
  scratch_.reset();
}

inline void PushableWriter::Reset() {
  Writer::Reset();
  if (ABSL_PREDICT_FALSE(scratch_used())) scratch_->buffer.Clear();
}

inline bool PushableWriter::scratch_used() const {
  return scratch_ != nullptr && !scratch_->buffer.empty();
}

inline PushableWriter::BehindScratch::BehindScratch(
    PushableWriter* context ABSL_ATTRIBUTE_LIFETIME_BOUND)
    : context_(context) {
  if (ABSL_PREDICT_FALSE(context_->scratch_used())) Enter();
}

inline PushableWriter::BehindScratch::~BehindScratch() {
  if (ABSL_PREDICT_FALSE(scratch_ != nullptr)) Leave();
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_PUSHABLE_WRITER_H_


================================================
FILE: riegeli/bytes/read_all.cc
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/read_all.h"

#include <stddef.h>

#include <limits>
#include <optional>
#include <string>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/resize_and_overwrite.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"

namespace riegeli::read_all_internal {

namespace {

ABSL_ATTRIBUTE_COLD absl::Status MaxLengthExceeded(Reader& src,
                                                   Position max_length) {
  return src.AnnotateStatus(absl::ResourceExhaustedError(
      absl::StrCat("Maximum length exceeded: ", max_length)));
}

absl::Status ReadAllImpl(Reader& src, absl::string_view& dest,
                         size_t max_length) {
  if (src.SupportsSize()) {
    const std::optional<Position> size = src.Size();
    if (ABSL_PREDICT_FALSE(size == std::nullopt)) {
      dest = absl::string_view();
      return src.status();
    }
    const Position remaining = SaturatingSub(*size, src.pos());
    if (ABSL_PREDICT_FALSE(remaining > max_length)) {
      if (ABSL_PREDICT_FALSE(!src.Read(max_length, dest))) {
        if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
        return absl::OkStatus();
      }
      return MaxLengthExceeded(src, max_length);
    }
    if (ABSL_PREDICT_FALSE(!src.Read(IntCast<size_t>(remaining), dest))) {
      if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
    }
  } else {
    do {
      if (ABSL_PREDICT_FALSE(src.available() > max_length)) {
        dest = absl::string_view(src.cursor(), max_length);
        src.move_cursor(max_length);
        return MaxLengthExceeded(src, max_length);
      }
    } while (src.Pull(src.available() + 1));
    dest = absl::string_view(src.cursor(), src.available());
    src.move_cursor(src.available());
    if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
  }
  return absl::OkStatus();
}

absl::Status ReadAndAppendAllImpl(Reader& src, std::string& dest,
                                  size_t max_length) {
  if (src.SupportsSize()) {
    const std::optional<Position> size = src.Size();
    if (ABSL_PREDICT_FALSE(size == std::nullopt)) return src.status();
    const Position remaining = SaturatingSub(*size, src.pos());
    if (ABSL_PREDICT_FALSE(remaining > max_length)) {
      if (ABSL_PREDICT_FALSE(!src.ReadAndAppend(max_length, dest))) {
        if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
        return absl::OkStatus();
      }
      return MaxLengthExceeded(src, max_length);
    }
    if (ABSL_PREDICT_FALSE(
            !src.ReadAndAppend(IntCast<size_t>(remaining), dest))) {
      if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
    }
  } else {
    if (ABSL_PREDICT_FALSE(!src.Pull())) {
      if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
      return absl::OkStatus();
    }
    size_t remaining_max_length = max_length;
    const size_t old_size = dest.size();
    if (src.available() < dest.capacity() - old_size) {
      // Try to fill all remaining space in `dest`, to avoid copying through the
      // `Chain` in case the remaining length is smaller.
      const size_t length =
          UnsignedMin(dest.capacity() - old_size, remaining_max_length);
      bool read_ok;
      size_t length_read;
      absl::StringResizeAndOverwrite(
          dest, old_size + length, [&](char* data, size_t size) {
            read_ok = src.Read(size - old_size, data + old_size, &length_read);
            return old_size + length_read;
          });
      if (!read_ok) {
        if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
        return absl::OkStatus();
      }
      remaining_max_length -= length_read;
    }
    Chain buffer;
    do {
      if (ABSL_PREDICT_FALSE(src.available() > remaining_max_length)) {
        src.ReadAndAppend(remaining_max_length, buffer);
        std::move(buffer).AppendTo(dest);
        return MaxLengthExceeded(src, max_length);
      }
      remaining_max_length -= src.available();
      src.ReadAndAppend(src.available(), buffer);
    } while (src.Pull());
    std::move(buffer).AppendTo(dest);
    if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
  }
  return absl::OkStatus();
}

absl::Status ReadAndAppendAllImpl(Reader& src, Chain& dest, size_t max_length) {
  max_length =
      UnsignedMin(max_length, std::numeric_limits<size_t>::max() - dest.size());
  if (src.SupportsSize()) {
    const std::optional<Position> size = src.Size();
    if (ABSL_PREDICT_FALSE(size == std::nullopt)) return src.status();
    const Position remaining = SaturatingSub(*size, src.pos());
    if (ABSL_PREDICT_FALSE(remaining > max_length)) {
      if (ABSL_PREDICT_FALSE(!src.ReadAndAppend(max_length, dest))) {
        if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
        return absl::OkStatus();
      }
      return MaxLengthExceeded(src, max_length);
    }
    if (ABSL_PREDICT_FALSE(
            !src.ReadAndAppend(IntCast<size_t>(remaining), dest))) {
      if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
    }
  } else {
    size_t remaining_max_length = max_length;
    do {
      if (ABSL_PREDICT_FALSE(src.available() > remaining_max_length)) {
        src.ReadAndAppend(remaining_max_length, dest);
        return MaxLengthExceeded(src, max_length);
      }
      remaining_max_length -= src.available();
      src.ReadAndAppend(src.available(), dest);
    } while (src.Pull());
    if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
  }
  return absl::OkStatus();
}

absl::Status ReadAndAppendAllImpl(Reader& src, absl::Cord& dest,
                                  size_t max_length) {
  max_length =
      UnsignedMin(max_length, std::numeric_limits<size_t>::max() - dest.size());
  if (src.SupportsSize()) {
    const std::optional<Position> size = src.Size();
    if (ABSL_PREDICT_FALSE(size == std::nullopt)) return src.status();
    const Position remaining = SaturatingSub(*size, src.pos());
    if (ABSL_PREDICT_FALSE(remaining > max_length)) {
      if (ABSL_PREDICT_FALSE(!src.ReadAndAppend(max_length, dest))) {
        if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
        return absl::OkStatus();
      }
      return MaxLengthExceeded(src, max_length);
    }
    if (ABSL_PREDICT_FALSE(
            !src.ReadAndAppend(IntCast<size_t>(remaining), dest))) {
      if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
    }
  } else {
    size_t remaining_max_length = max_length;
    do {
      if (ABSL_PREDICT_FALSE(src.available() > remaining_max_length)) {
        src.ReadAndAppend(remaining_max_length, dest);
        return MaxLengthExceeded(src, max_length);
      }
      remaining_max_length -= src.available();
      src.ReadAndAppend(src.available(), dest);
    } while (src.Pull());
    if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
  }
  return absl::OkStatus();
}

}  // namespace

absl::Status ReadAllImpl(Reader& src, absl::string_view& dest,
                         size_t max_length, size_t* length_read) {
  const absl::Status status = ReadAllImpl(src, dest, max_length);
  if (length_read != nullptr) *length_read = dest.size();
  return status;
}

absl::Status ReadAllImpl(Reader& src, char* dest, size_t max_length,
                         size_t* length_read) {
  if (!src.Read(max_length, dest, length_read)) {
    if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
    return absl::OkStatus();
  }
  if (ABSL_PREDICT_FALSE(src.Pull())) return MaxLengthExceeded(src, max_length);
  return absl::OkStatus();
}

absl::Status ReadAllImpl(Reader& src, std::string& dest, size_t max_length,
                         size_t* length_read) {
  dest.clear();
  return ReadAndAppendAllImpl(src, dest, max_length, length_read);
}

absl::Status ReadAllImpl(Reader& src, Chain& dest, size_t max_length,
                         size_t* length_read) {
  dest.Clear();
  return ReadAndAppendAllImpl(src, dest, max_length, length_read);
}

absl::Status ReadAllImpl(Reader& src, absl::Cord& dest, size_t max_length,
                         size_t* length_read) {
  dest.Clear();
  return ReadAndAppendAllImpl(src, dest, max_length, length_read);
}

absl::Status ReadAndAppendAllImpl(Reader& src, std::string& dest,
                                  size_t max_length, size_t* length_read) {
  if (length_read == nullptr) {
    return ReadAndAppendAllImpl(src, dest, max_length);
  }
  const Position pos_before = src.pos();
  const absl::Status status = ReadAndAppendAllImpl(src, dest, max_length);
  RIEGELI_ASSERT_GE(src.pos(), pos_before)
      << "ReadAndAppendAllImpl(std::string&) decreased src.pos()";
  RIEGELI_ASSERT_LE(src.pos() - pos_before, max_length)
      << "ReadAndAppendAllImpl(std::string&) read more than requested";
  *length_read = IntCast<size_t>(src.pos() - pos_before);
  return status;
}

absl::Status ReadAndAppendAllImpl(Reader& src, Chain& dest, size_t max_length,
                                  size_t* length_read) {
  if (length_read == nullptr) {
    return ReadAndAppendAllImpl(src, dest, max_length);
  }
  const Position pos_before = src.pos();
  const absl::Status status = ReadAndAppendAllImpl(src, dest, max_length);
  RIEGELI_ASSERT_GE(src.pos(), pos_before)
      << "ReadAndAppendAllImpl(Chain&) decreased src.pos()";
  RIEGELI_ASSERT_LE(src.pos() - pos_before, max_length)
      << "ReadAndAppendAllImpl(Chain&) read more than requested";
  *length_read = IntCast<size_t>(src.pos() - pos_before);
  return status;
}

absl::Status ReadAndAppendAllImpl(Reader& src, absl::Cord& dest,
                                  size_t max_length, size_t* length_read) {
  if (length_read == nullptr) {
    return ReadAndAppendAllImpl(src, dest, max_length);
  }
  const Position pos_before = src.pos();
  const absl::Status status = ReadAndAppendAllImpl(src, dest, max_length);
  RIEGELI_ASSERT_GE(src.pos(), pos_before)
      << "ReadAndAppendAllImpl(absl::Cord&) decreased src.pos()";
  RIEGELI_ASSERT_LE(src.pos() - pos_before, max_length)
      << "ReadAndAppendAllImpl(absl::Cord&) read more than requested";
  *length_read = IntCast<size_t>(src.pos() - pos_before);
  return status;
}

}  // namespace riegeli::read_all_internal


================================================
FILE: riegeli/bytes/read_all.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_READ_ALL_H_
#define RIEGELI_BYTES_READ_ALL_H_

#include <stddef.h>

#include <limits>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/status.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

namespace read_all_internal {

template <typename Work>
using StringViewCallResult =
    decltype(std::declval<Work&&>()(std::declval<absl::string_view>()));

}  // namespace read_all_internal

// Combines creating a `Reader` (optionally), reading all remaining data to
// `dest` (clearing any existing data in `dest`), and `VerifyEndAndClose()`
// (if the `Reader` is owned).
//
// If `length_read != nullptr` then sets `*length_read` to the length read.
// This is equal to the difference between `src.pos()` after and before the
// call.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the `Reader`. `Src` must support
// `DependencyRef<Reader*, Src>`, e.g. `Reader&` (not owned),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `AnyRef<Reader*>` (maybe owned).
//
// Reading to `absl::string_view` is supported in two ways:
//
//  1. With `Src` being restricted to `Reader&`, i.e. not owned.
//
//     The `absl::string_view` is valid until the next non-const operation on
//     the `Reader`.
//
//  2. With the `absl::string_view&` output parameter replaced with a function
//     to be called with a parameter of type `absl::string_view`. The function
//     is called with data read.
//
//     If the `Reader` is owned, it is closed after calling the function.
//     This invalidates the `absl::string_view`.
//
//     For `T` being the result type of the function, the result type of
//     `ReadAll()` generalizes `absl::StatusOr<T>` for types where that is not
//     applicable:
//      * `absl::StatusOr<const T>`           -> `absl::StatusOr<T>`
//      * `absl::StatusOr<T&>`                -> `absl::StatusOr<T>`
//      * `absl::StatusOr<T&&>`               -> `absl::StatusOr<T>`
//      * `absl::StatusOr<void>`              -> `absl::Status`
//      * `absl::StatusOr<absl::Status>`      -> `absl::Status`
//      * `absl::StatusOr<absl::StatusOr<T>>` -> `absl::StatusOr<T>`
//
//     The function is called only if reading succeeded. If the function
//     succeeds but closing fails, the result of the function is discarded.
absl::Status ReadAll(Reader& src, absl::string_view& dest,
                     size_t max_length = std::numeric_limits<size_t>::max(),
                     size_t* length_read = nullptr);
absl::Status ReadAll(Reader& src, absl::string_view& dest, size_t* length_read);
template <
    typename Src, typename Work,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int> = 0>
StatusOrMakerT<read_all_internal::StringViewCallResult<Work>> ReadAll(
    Src&& src, Work&& work,
    size_t max_length = std::numeric_limits<size_t>::max(),
    size_t* length_read = nullptr);
template <
    typename Src, typename Work,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int> = 0>
StatusOrMakerT<read_all_internal::StringViewCallResult<Work>> ReadAll(
    Src&& src, Work&& work, size_t* length_read);

// Combines creating a `Reader` (optionally), reading all remaining data to
// `dest` (clearing any existing data in `dest`), and `VerifyEndAndClose()`
// (if the `Reader` is owned).
//
// If `length_read != nullptr` then sets `*length_read` to the length read.
// This is equal to the difference between `src.pos()` after and before the
// call.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the `Reader`. `Src` must support
// `DependencyRef<Reader*, Src>`, e.g. `Reader&` (not owned),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `AnyRef<Reader*>` (maybe owned).
template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int> = 0>
absl::Status ReadAll(Src&& src, char* dest, size_t max_length,
                     size_t* length_read);
template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int> = 0>
absl::Status ReadAll(Src&& src, std::string& dest,
                     size_t max_length = std::numeric_limits<size_t>::max(),
                     size_t* length_read = nullptr);
template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int> = 0>
absl::Status ReadAll(Src&& src, std::string& dest, size_t* length_read);
template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int> = 0>
absl::Status ReadAll(Src&& src, Chain& dest,
                     size_t max_length = std::numeric_limits<size_t>::max(),
                     size_t* length_read = nullptr);
template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int> = 0>
absl::Status ReadAll(Src&& src, Chain& dest, size_t* length_read);
template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int> = 0>
absl::Status ReadAll(Src&& src, absl::Cord& dest,
                     size_t max_length = std::numeric_limits<size_t>::max(),
                     size_t* length_read = nullptr);
template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int> = 0>
absl::Status ReadAll(Src&& src, absl::Cord& dest, size_t* length_read);

// Combines creating a `Reader` (optionally), reading all remaining data to
// `dest` (appending to any existing data in `dest`), and `VerifyEndAndClose()`
// (if the `Reader` is owned).
//
// If `length_read != nullptr` then sets `*length_read` to the length read.
// This is equal to the difference between `pos()` after and before the call.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the `Reader`. `Src` must support
// `DependencyRef<Reader*, Src>`, e.g. `Reader&` (not owned),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `AnyRef<Reader*>` (maybe owned).
template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int> = 0>
absl::Status ReadAndAppendAll(
    Src&& src, std::string& dest,
    size_t max_length = std::numeric_limits<size_t>::max(),
    size_t* length_read = nullptr);
template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int> = 0>
absl::Status ReadAndAppendAll(Src&& src, std::string& dest,
                              size_t* length_read);
template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int> = 0>
absl::Status ReadAndAppendAll(
    Src&& src, Chain& dest,
    size_t max_length = std::numeric_limits<size_t>::max(),
    size_t* length_read = nullptr);
template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int> = 0>
absl::Status ReadAndAppendAll(Src&& src, Chain& dest, size_t* length_read);
template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int> = 0>
absl::Status ReadAndAppendAll(
    Src&& src, absl::Cord& dest,
    size_t max_length = std::numeric_limits<size_t>::max(),
    size_t* length_read = nullptr);
template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int> = 0>
absl::Status ReadAndAppendAll(Src&& src, absl::Cord& dest, size_t* length_read);

// Implementation details follow.

namespace read_all_internal {

absl::Status ReadAllImpl(Reader& src, absl::string_view& dest,
                         size_t max_length, size_t* length_read);
absl::Status ReadAllImpl(Reader& src, char* dest, size_t max_length,
                         size_t* length_read);
absl::Status ReadAllImpl(Reader& src, std::string& dest, size_t max_length,
                         size_t* length_read);
absl::Status ReadAllImpl(Reader& src, Chain& dest, size_t max_length,
                         size_t* length_read);
absl::Status ReadAllImpl(Reader& src, absl::Cord& dest, size_t max_length,
                         size_t* length_read);
absl::Status ReadAndAppendAllImpl(Reader& src, std::string& dest,
                                  size_t max_length, size_t* length_read);
absl::Status ReadAndAppendAllImpl(Reader& src, Chain& dest, size_t max_length,
                                  size_t* length_read);
absl::Status ReadAndAppendAllImpl(Reader& src, absl::Cord& dest,
                                  size_t max_length, size_t* length_read);

template <typename Src, typename Dest>
inline absl::Status ReadAllInternal(Src&& src, Dest& dest, size_t max_length,
                                    size_t* length_read) {
  DependencyRef<Reader*, Src> src_dep(std::forward<Src>(src));
  if (src_dep.IsOwning()) src_dep->SetReadAllHint(true);
  absl::Status status = ReadAllImpl(*src_dep, dest, max_length, length_read);
  if (src_dep.IsOwning()) {
    if (ABSL_PREDICT_TRUE(status.ok())) src_dep->VerifyEnd();
    if (ABSL_PREDICT_FALSE(!src_dep->Close())) status.Update(src_dep->status());
  }
  return status;
}

template <typename Src, typename Dest>
inline absl::Status ReadAndAppendAllInternal(Src&& src, Dest& dest,
                                             size_t max_length,
                                             size_t* length_read) {
  DependencyRef<Reader*, Src> src_dep(std::forward<Src>(src));
  if (src_dep.IsOwning()) src_dep->SetReadAllHint(true);
  absl::Status status =
      ReadAndAppendAllImpl(*src_dep, dest, max_length, length_read);
  if (src_dep.IsOwning()) {
    if (ABSL_PREDICT_TRUE(status.ok())) src_dep->VerifyEnd();
    if (ABSL_PREDICT_FALSE(!src_dep->Close())) status.Update(src_dep->status());
  }
  return status;
}

}  // namespace read_all_internal

inline absl::Status ReadAll(Reader& src, absl::string_view& dest,
                            size_t max_length, size_t* length_read) {
  return read_all_internal::ReadAllImpl(src, dest, max_length, length_read);
}

inline absl::Status ReadAll(Reader& src, absl::string_view& dest,
                            size_t* length_read) {
  return ReadAll(src, dest, std::numeric_limits<size_t>::max(), length_read);
}

template <
    typename Src, typename Work,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
inline StatusOrMakerT<read_all_internal::StringViewCallResult<Work>> ReadAll(
    Src&& src, Work&& work, size_t max_length, size_t* length_read) {
  using WorkResult = read_all_internal::StringViewCallResult<Work>;
  DependencyRef<Reader*, Src> src_dep(std::forward<Src>(src));
  if (src_dep.IsOwning()) src_dep->SetReadAllHint(true);
  absl::string_view dest;
  absl::Status status =
      read_all_internal::ReadAllImpl(*src_dep, dest, max_length, length_read);
  typename StatusOrMaker<WorkResult>::type result =
      ABSL_PREDICT_FALSE(!status.ok())
          ? StatusOrMaker<WorkResult>::FromStatus(std::move(status))
          : StatusOrMaker<WorkResult>::FromWork(
                [&]() -> WorkResult { return std::forward<Work>(work)(dest); });
  if (src_dep.IsOwning()) {
    if (ABSL_PREDICT_TRUE(result.ok())) src_dep->VerifyEnd();
    if (ABSL_PREDICT_FALSE(!src_dep->Close())) {
      StatusOrMaker<WorkResult>::Update(result, src_dep->status());
    }
  }
  return result;
}

template <
    typename Src, typename Work,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
inline StatusOrMakerT<read_all_internal::StringViewCallResult<Work>> ReadAll(
    Src&& src, Work&& work, size_t* length_read) {
  return ReadAll(std::forward<Src>(src), std::forward<Work>(work),
                 std::numeric_limits<size_t>::max(), length_read);
}

template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
absl::Status ReadAll(Src&& src, char* dest, size_t max_length,
                     size_t* length_read) {
  DependencyRef<Reader*, Src> src_dep(std::forward<Src>(src));
  if (src_dep.IsOwning()) src_dep->SetReadAllHint(true);
  absl::Status status =
      read_all_internal::ReadAllImpl(*src_dep, dest, max_length, length_read);
  if (src_dep.IsOwning()) {
    if (ABSL_PREDICT_TRUE(status.ok())) src_dep->VerifyEnd();
    if (ABSL_PREDICT_FALSE(!src_dep->Close())) status.Update(src_dep->status());
  }
  return status;
}

template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
inline absl::Status ReadAll(Src&& src, std::string& dest, size_t max_length,
                            size_t* length_read) {
  return read_all_internal::ReadAllInternal(std::forward<Src>(src), dest,
                                            max_length, length_read);
}

template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
inline absl::Status ReadAll(Src&& src, std::string& dest, size_t* length_read) {
  return ReadAll(std::forward<Src>(src), dest,
                 std::numeric_limits<size_t>::max(), length_read);
}

template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
inline absl::Status ReadAll(Src&& src, Chain& dest, size_t max_length,
                            size_t* length_read) {
  return read_all_internal::ReadAllInternal(std::forward<Src>(src), dest,
                                            max_length, length_read);
}

template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
inline absl::Status ReadAll(Src&& src, Chain& dest, size_t* length_read) {
  return ReadAll(std::forward<Src>(src), dest,
                 std::numeric_limits<size_t>::max(), length_read);
}

template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
inline absl::Status ReadAll(Src&& src, absl::Cord& dest, size_t max_length,
                            size_t* length_read) {
  return read_all_internal::ReadAllInternal(std::forward<Src>(src), dest,
                                            max_length, length_read);
}

template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
inline absl::Status ReadAll(Src&& src, absl::Cord& dest, size_t* length_read) {
  return ReadAll(std::forward<Src>(src), dest,
                 std::numeric_limits<size_t>::max(), length_read);
}

template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
inline absl::Status ReadAndAppendAll(Src&& src, std::string& dest,
                                     size_t max_length, size_t* length_read) {
  return read_all_internal::ReadAndAppendAllInternal(
      std::forward<Src>(src), dest, max_length, length_read);
}

template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
inline absl::Status ReadAndAppendAll(Src&& src, std::string& dest,
                                     size_t* length_read) {
  return ReadAndAppendAll(std::forward<Src>(src), dest,
                          std::numeric_limits<size_t>::max(), length_read);
}

template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
inline absl::Status ReadAndAppendAll(Src&& src, Chain& dest, size_t max_length,
                                     size_t* length_read) {
  return read_all_internal::ReadAndAppendAllInternal(
      std::forward<Src>(src), dest, max_length, length_read);
}

template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
inline absl::Status ReadAndAppendAll(Src&& src, Chain& dest,
                                     size_t* length_read) {
  return ReadAndAppendAll(std::forward<Src>(src), dest,
                          std::numeric_limits<size_t>::max(), length_read);
}

template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
inline absl::Status ReadAndAppendAll(Src&& src, absl::Cord& dest,
                                     size_t max_length, size_t* length_read) {
  return read_all_internal::ReadAndAppendAllInternal(
      std::forward<Src>(src), dest, max_length, length_read);
}

template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
inline absl::Status ReadAndAppendAll(Src&& src, absl::Cord& dest,
                                     size_t* length_read) {
  return ReadAndAppendAll(std::forward<Src>(src), dest,
                          std::numeric_limits<size_t>::max(), length_read);
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_READ_ALL_H_


================================================
FILE: riegeli/bytes/reader.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/reader.h"

#include <stddef.h>

#include <cstring>
#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/cord_buffer.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/cord_utils.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/status.h"
#include "riegeli/base/string_utils.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void Reader::VerifyEndImpl() {
  if (ABSL_PREDICT_FALSE(Pull())) {
    absl::Status status = absl::InvalidArgumentError("End of data expected");
    if (SupportsSize()) {
      const std::optional<Position> size = Size();
      if (size != std::nullopt) {
        status = Annotate(status, absl::StrCat("remaining length: ",
                                               SaturatingSub(*size, pos())));
      }
    }
    Fail(std::move(status));
  }
}

absl::Status Reader::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) return Annotate(status, absl::StrCat("at byte ", pos()));
  return status;
}

bool Reader::FailOverflow() {
  return Fail(absl::ResourceExhaustedError("Reader position overflow"));
}

bool Reader::Read(size_t length, std::string& dest, size_t* length_read) {
  if (ABSL_PREDICT_TRUE(available() >= length)) {
    // `std::string::assign()` checks for size overflow.
    dest.assign(cursor(), length);
    move_cursor(length);
    if (length_read != nullptr) *length_read = length;
    return true;
  }
  dest.clear();
  if (length_read != nullptr) return ReadSlow(length, dest, *length_read);
  return ReadSlow(length, dest);
}

bool Reader::Read(size_t length, Chain& dest, size_t* length_read) {
  if (ABSL_PREDICT_TRUE(available() >= length && length <= kMaxBytesToCopy)) {
    dest.Reset(absl::string_view(cursor(), length));
    move_cursor(length);
    if (length_read != nullptr) *length_read = length;
    return true;
  }
  dest.Clear();
  if (length_read != nullptr) return ReadSlow(length, dest, *length_read);
  return ReadSlow(length, dest);
}

bool Reader::Read(size_t length, absl::Cord& dest, size_t* length_read) {
  if (ABSL_PREDICT_TRUE(available() >= length && length <= kMaxBytesToCopy)) {
    dest = absl::string_view(cursor(), length);
    move_cursor(length);
    if (length_read != nullptr) *length_read = length;
    return true;
  }
  dest.Clear();
  if (length_read != nullptr) return ReadSlow(length, dest, *length_read);
  return ReadSlow(length, dest);
}

bool Reader::ReadAndAppend(size_t length, std::string& dest,
                           size_t* length_read) {
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadAndAppend(string&): "
         "string size overflow";
  if (ABSL_PREDICT_TRUE(available() >= length)) {
    // `std::string::append()` checks for size overflow.
    dest.append(cursor(), length);
    move_cursor(length);
    if (length_read != nullptr) *length_read = length;
    return true;
  }
  RIEGELI_CHECK_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadAndAppend(string&): "
         "string size overflow";
  if (length_read != nullptr) return ReadSlow(length, dest, *length_read);
  return ReadSlow(length, dest);
}

bool Reader::ReadAndAppend(size_t length, Chain& dest, size_t* length_read) {
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadAndAppend(Chain&): "
         "Chain size overflow";
  if (ABSL_PREDICT_TRUE(available() >= length && length <= kMaxBytesToCopy)) {
    // `Chain::Append()` checks for size overflow.
    dest.Append(absl::string_view(cursor(), length));
    move_cursor(length);
    if (length_read != nullptr) *length_read = length;
    return true;
  }
  RIEGELI_CHECK_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadAndAppend(Chain&): "
         "Chain size overflow";
  if (length_read != nullptr) return ReadSlow(length, dest, *length_read);
  return ReadSlow(length, dest);
}

bool Reader::ReadAndAppend(size_t length, absl::Cord& dest,
                           size_t* length_read) {
  // `absl::Cord::Append()` does not check for size overflow.
  RIEGELI_CHECK_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadAndAppend(Cord&): "
         "Cord size overflow";
  if (ABSL_PREDICT_TRUE(available() >= length && length <= kMaxBytesToCopy)) {
    dest.Append(absl::string_view(cursor(), length));
    move_cursor(length);
    if (length_read != nullptr) *length_read = length;
    return true;
  }
  if (length_read != nullptr) return ReadSlow(length, dest, *length_read);
  return ReadSlow(length, dest);
}

bool Reader::Copy(Position length, Writer& dest, Position* length_read) {
  if (ABSL_PREDICT_TRUE(available() >= length && length <= kMaxBytesToCopy)) {
    const absl::string_view data(cursor(), IntCast<size_t>(length));
    move_cursor(IntCast<size_t>(length));
    if (length_read != nullptr) *length_read = length;
    return dest.Write(data);
  }
  if (length_read != nullptr) return CopySlow(length, dest, *length_read);
  return CopySlow(length, dest);
}

bool Reader::Copy(size_t length, BackwardWriter& dest) {
  if (ABSL_PREDICT_TRUE(available() >= length && length <= kMaxBytesToCopy)) {
    const absl::string_view data(cursor(), length);
    move_cursor(length);
    return dest.Write(data);
  }
  return CopySlow(length, dest);
}

bool Reader::ReadSome(size_t max_length, std::string& dest,
                      size_t* length_read) {
  if (ABSL_PREDICT_TRUE(available() > 0) ||
      ABSL_PREDICT_FALSE(max_length == 0)) {
    max_length = UnsignedMin(max_length, available());
    dest.assign(cursor(), max_length);
    move_cursor(max_length);
    if (length_read != nullptr) *length_read = max_length;
    return true;
  }
  dest.clear();
  if (length_read != nullptr) {
    return ReadSomeSlow(max_length, dest, *length_read);
  }
  return ReadSomeSlow(max_length, dest);
}

bool Reader::ReadSome(size_t max_length, Chain& dest, size_t* length_read) {
  if (ABSL_PREDICT_TRUE(available() > 0) ||
      ABSL_PREDICT_FALSE(max_length == 0)) {
    max_length = UnsignedMin(max_length, available());
    if (ABSL_PREDICT_TRUE(max_length <= kMaxBytesToCopy)) {
      dest.Reset(absl::string_view(cursor(), max_length));
      move_cursor(max_length);
      if (length_read != nullptr) *length_read = max_length;
      return true;
    }
    dest.Clear();
    if (length_read != nullptr) return ReadSlow(max_length, dest, *length_read);
    return ReadSlow(max_length, dest);
  }
  dest.Clear();
  if (length_read != nullptr) {
    return ReadSomeSlow(max_length, dest, *length_read);
  }
  return ReadSomeSlow(max_length, dest);
}

bool Reader::ReadSome(size_t max_length, absl::Cord& dest,
                      size_t* length_read) {
  if (ABSL_PREDICT_TRUE(available() > 0) ||
      ABSL_PREDICT_FALSE(max_length == 0)) {
    max_length = UnsignedMin(max_length, available());
    if (ABSL_PREDICT_TRUE(max_length <= kMaxBytesToCopy)) {
      dest = absl::string_view(cursor(), max_length);
      move_cursor(max_length);
      if (length_read != nullptr) *length_read = max_length;
      return true;
    }
    dest.Clear();
    if (length_read != nullptr) return ReadSlow(max_length, dest, *length_read);
    return ReadSlow(max_length, dest);
  }
  dest.Clear();
  if (length_read != nullptr) {
    return ReadSomeSlow(max_length, dest, *length_read);
  }
  return ReadSomeSlow(max_length, dest);
}

bool Reader::ReadAndAppendSome(size_t max_length, std::string& dest,
                               size_t* length_read) {
  RIEGELI_ASSERT_LE(max_length,
                    std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadAndAppendSome(string&): "
         "string size overflow";
  if (ABSL_PREDICT_TRUE(available() > 0) ||
      ABSL_PREDICT_FALSE(max_length == 0)) {
    max_length = UnsignedMin(max_length, available());
    // `std::string::append()` checks for size overflow.
    dest.append(cursor(), max_length);
    move_cursor(max_length);
    if (length_read != nullptr) *length_read = max_length;
    return true;
  }
  RIEGELI_CHECK_LE(max_length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadAndAppendSome(string&): "
         "string size overflow";
  if (length_read != nullptr) {
    return ReadSomeSlow(max_length, dest, *length_read);
  }
  return ReadSomeSlow(max_length, dest);
}

bool Reader::ReadAndAppendSome(size_t max_length, Chain& dest,
                               size_t* length_read) {
  RIEGELI_ASSERT_LE(max_length,
                    std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadAndAppendSome(Chain&): "
         "Chain size overflow";
  if (ABSL_PREDICT_TRUE(available() > 0) ||
      ABSL_PREDICT_FALSE(max_length == 0)) {
    max_length = UnsignedMin(max_length, available());
    if (ABSL_PREDICT_TRUE(max_length <= kMaxBytesToCopy)) {
      // `Chain::Append()` checks for size overflow.
      dest.Append(absl::string_view(cursor(), max_length));
      move_cursor(max_length);
      if (length_read != nullptr) *length_read = max_length;
      return true;
    }
    RIEGELI_CHECK_LE(max_length,
                     std::numeric_limits<size_t>::max() - dest.size())
        << "Failed precondition of Reader::ReadAndAppendSome(Chain&): "
           "Chain size overflow";
    if (length_read != nullptr) return ReadSlow(max_length, dest, *length_read);
    return ReadSlow(max_length, dest);
  }
  RIEGELI_CHECK_LE(max_length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadAndAppendSome(Chain&): "
         "Chain size overflow";
  if (length_read != nullptr) {
    return ReadSomeSlow(max_length, dest, *length_read);
  }
  return ReadSomeSlow(max_length, dest);
}

bool Reader::ReadAndAppendSome(size_t max_length, absl::Cord& dest,
                               size_t* length_read) {
  // `absl::Cord::Append()` does not check for size overflow.
  RIEGELI_CHECK_LE(max_length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadAndAppendSome(Cord&): "
         "Cord size overflow";
  if (ABSL_PREDICT_TRUE(available() > 0) ||
      ABSL_PREDICT_FALSE(max_length == 0)) {
    max_length = UnsignedMin(max_length, available());
    if (ABSL_PREDICT_TRUE(max_length <= kMaxBytesToCopy)) {
      dest.Append(absl::string_view(cursor(), max_length));
      move_cursor(max_length);
      if (length_read != nullptr) *length_read = max_length;
      return true;
    }
    if (length_read != nullptr) return ReadSlow(max_length, dest, *length_read);
    return ReadSlow(max_length, dest);
  }
  if (length_read != nullptr) {
    return ReadSomeSlow(max_length, dest, *length_read);
  }
  return ReadSomeSlow(max_length, dest);
}

bool Reader::CopySome(size_t max_length, Writer& dest, size_t* length_read) {
  if (ABSL_PREDICT_TRUE(available() > 0) ||
      ABSL_PREDICT_FALSE(max_length == 0)) {
    max_length = UnsignedMin(max_length, available());
    if (ABSL_PREDICT_TRUE(max_length <= kMaxBytesToCopy)) {
      const absl::string_view data(cursor(), max_length);
      move_cursor(max_length);
      if (length_read != nullptr) *length_read = max_length;
      return dest.Write(data);
    }
    if (length_read != nullptr) {
      Position length_read_pos;
      const bool copy_ok = CopySlow(max_length, dest, length_read_pos);
      *length_read = IntCast<size_t>(length_read_pos);
      return copy_ok;
    }
    return CopySlow(max_length, dest);
  }
  if (length_read != nullptr) {
    return CopySomeSlow(max_length, dest, *length_read);
  }
  return CopySomeSlow(max_length, dest);
}

bool Reader::ReadSlow(size_t length, char* dest) {
  RIEGELI_ASSERT_LT(available(), length)
      << "Failed precondition of Reader::ReadSlow(char*): "
         "enough data available, use Read(char*) instead";
  do {
    const size_t available_length = available();
    riegeli::null_safe_memcpy(dest, cursor(), available_length);
    move_cursor(available_length);
    dest += available_length;
    length -= available_length;
    if (ABSL_PREDICT_FALSE(!PullSlow(1, length))) return false;
  } while (length > available());
  std::memcpy(dest, cursor(), length);
  move_cursor(length);
  return true;
}

bool Reader::ReadSlow(size_t length, char* dest, size_t& length_read) {
  RIEGELI_ASSERT_LT(available(), length)
      << "Failed precondition of Reader::ReadSlow(char*): "
         "enough data available, use Read(char*) instead";
  const Position pos_before = pos();
  const bool read_ok = ReadSlow(length, dest);
  RIEGELI_ASSERT_GE(pos(), pos_before)
      << "Reader::ReadSlow(char*) decreased pos()";
  RIEGELI_ASSERT_LE(pos() - pos_before, length)
      << "Reader::ReadSlow(char*) read more than requested";
  if (ABSL_PREDICT_FALSE(!read_ok)) {
    length_read = IntCast<size_t>(pos() - pos_before);
    return false;
  }
  RIEGELI_ASSERT_EQ(pos() - pos_before, length)
      << "Reader::ReadSlow(char*) succeeded but read less than requested";
  length_read = length;
  return true;
}

bool Reader::ReadSlow(size_t length, std::string& dest) {
  RIEGELI_ASSERT_LT(available(), length)
      << "Failed precondition of Reader::ReadSlow(string&): "
         "enough data available, use Read(string&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(string&): "
         "string size overflow";
  const size_t old_size = dest.size();
  bool read_ok;
  riegeli::StringResizeAndOverwriteAmortized(
      dest, old_size + length, [&](char* data, size_t size) {
        size_t length_read;
        read_ok = ReadSlow(size - old_size, data + old_size, length_read);
        return old_size + length_read;
      });
  return read_ok;
}

bool Reader::ReadSlow(size_t length, std::string& dest, size_t& length_read) {
  RIEGELI_ASSERT_LT(available(), length)
      << "Failed precondition of Reader::ReadSlow(string&): "
         "enough data available, use Read(string&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(string&): "
         "string size overflow";
  const Position pos_before = pos();
  const bool read_ok = ReadSlow(length, dest);
  RIEGELI_ASSERT_GE(pos(), pos_before)
      << "Reader::ReadSlow(string&) decreased pos()";
  RIEGELI_ASSERT_LE(pos() - pos_before, length)
      << "Reader::ReadSlow(string&) read more than requested";
  if (ABSL_PREDICT_FALSE(!read_ok)) {
    length_read = IntCast<size_t>(pos() - pos_before);
    return false;
  }
  RIEGELI_ASSERT_EQ(pos() - pos_before, length)
      << "Reader::ReadSlow(string&) succeeded but read less than requested";
  length_read = length;
  return true;
}

bool Reader::ReadSlow(size_t length, Chain& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "enough data available, use Read(Chain&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "Chain size overflow";
  do {
    const absl::Span<char> buffer = dest.AppendBuffer(1, length, length);
    size_t length_read;
    if (ABSL_PREDICT_FALSE(!Read(buffer.size(), buffer.data(), &length_read))) {
      dest.RemoveSuffix(buffer.size() - length_read);
      return false;
    }
    length -= length_read;
  } while (length > 0);
  return true;
}

bool Reader::ReadSlow(size_t length, Chain& dest, size_t& length_read) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "enough data available, use Read(Chain&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "Chain size overflow";
  const Position pos_before = pos();
  const bool read_ok = ReadSlow(length, dest);
  RIEGELI_ASSERT_GE(pos(), pos_before)
      << "Reader::ReadSlow(Chain&) decreased pos()";
  RIEGELI_ASSERT_LE(pos() - pos_before, length)
      << "Reader::ReadSlow(Chain&) read more than requested";
  if (ABSL_PREDICT_FALSE(!read_ok)) {
    length_read = IntCast<size_t>(pos() - pos_before);
    return false;
  }
  RIEGELI_ASSERT_EQ(pos() - pos_before, length)
      << "Reader::ReadSlow(Chain&) succeeded but read less than requested";
  length_read = length;
  return true;
}

bool Reader::ReadSlow(size_t length, absl::Cord& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "enough data available, use Read(Cord&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "Cord size overflow";
  absl::CordBuffer buffer = dest.GetCustomAppendBuffer(
      cord_internal::kCordBufferBlockSize, length, 1);
  absl::Span<char> span = buffer.available_up_to(length);
  if (buffer.capacity() < kDefaultMinBlockSize && length > span.size()) {
    absl::CordBuffer new_buffer = absl::CordBuffer::CreateWithCustomLimit(
        cord_internal::kCordBufferBlockSize, buffer.length() + length);
    std::memcpy(new_buffer.data(), buffer.data(), buffer.length());
    new_buffer.SetLength(buffer.length());
    buffer = std::move(new_buffer);
    span = buffer.available_up_to(length);
  }
  for (;;) {
    size_t length_read;
    const bool read_ok = Read(span.size(), span.data(), &length_read);
    buffer.IncreaseLengthBy(length_read);
    dest.Append(std::move(buffer));
    if (ABSL_PREDICT_FALSE(!read_ok)) return false;
    length -= length_read;
    if (length == 0) return true;
    buffer = absl::CordBuffer::CreateWithCustomLimit(
        cord_internal::kCordBufferBlockSize, length);
    span = buffer.available_up_to(length);
  }
}

bool Reader::ReadSlow(size_t length, absl::Cord& dest, size_t& length_read) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "enough data available, use Read(Cord&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "Cord size overflow";
  const Position pos_before = pos();
  const bool read_ok = ReadSlow(length, dest);
  RIEGELI_ASSERT_GE(pos(), pos_before)
      << "Reader::ReadSlow(Cord&) decreased pos()";
  RIEGELI_ASSERT_LE(pos() - pos_before, length)
      << "Reader::ReadSlow(Cord&) read more than requested";
  if (ABSL_PREDICT_FALSE(!read_ok)) {
    length_read = IntCast<size_t>(pos() - pos_before);
    return false;
  }
  RIEGELI_ASSERT_EQ(pos() - pos_before, length)
      << "Reader::ReadSlow(Cord&) succeeded but read less than requested";
  length_read = length;
  return true;
}

bool Reader::CopySlow(Position length, Writer& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::CopySlow(Writer&): "
         "enough data available, use Copy(Writer&) instead";
  while (length > available()) {
    const absl::string_view data(cursor(), available());
    move_cursor(data.size());
    if (ABSL_PREDICT_FALSE(!dest.Write(data))) return false;
    length -= data.size();
    if (ABSL_PREDICT_FALSE(!PullSlow(1, length))) return false;
  }
  const absl::string_view data(cursor(), IntCast<size_t>(length));
  move_cursor(IntCast<size_t>(length));
  return dest.Write(data);
}

bool Reader::CopySlow(Position length, Writer& dest, Position& length_read) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::CopySlow(Writer&): "
         "enough data available, use Copy(Writer&) instead";
  const Position pos_before = pos();
  const bool copy_ok = CopySlow(length, dest);
  RIEGELI_ASSERT_GE(pos(), pos_before)
      << "Reader::CopySlow(Writer&) decreased pos()";
  RIEGELI_ASSERT_LE(pos() - pos_before, length)
      << "Reader::CopySlow(Writer&) read more than requested";
  if (ABSL_PREDICT_FALSE(!copy_ok)) {
    length_read = pos() - pos_before;
    return false;
  }
  RIEGELI_ASSERT_EQ(pos() - pos_before, length)
      << "Reader::CopySlow(Writer&) succeeded but read less than requested";
  length_read = length;
  return true;
}

bool Reader::CopySlow(size_t length, BackwardWriter& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::CopySlow(BackwardWriter&): "
         "enough data available, use Copy(BackwardWriter&) instead";
  if (length <= available()) {
    const absl::string_view data(cursor(), length);
    move_cursor(length);
    return dest.Write(data);
  }
  if (length <= kMaxBytesToCopy) {
    if (ABSL_PREDICT_FALSE(!dest.Push(length))) return false;
    dest.move_cursor(length);
    if (ABSL_PREDICT_FALSE(!ReadSlow(length, dest.cursor()))) {
      dest.set_cursor(dest.cursor() + length);
      return false;
    }
    return true;
  }
  Chain data;
  if (ABSL_PREDICT_FALSE(!ReadSlow(length, data))) return false;
  return dest.Write(std::move(data));
}

bool Reader::ReadSomeSlow(size_t max_length, char* dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "nothing to read, use ReadSome(char*) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "some data available, use ReadSome(char*) instead";
  if (ABSL_PREDICT_FALSE(!PullSlow(1, max_length))) return false;
  max_length = UnsignedMin(max_length, available());
  std::memcpy(dest, cursor(), max_length);
  move_cursor(max_length);
  return true;
}

bool Reader::ReadSomeSlow(size_t max_length, char* dest, size_t& length_read) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "nothing to read, use ReadSome(char*) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "some data available, use ReadSome(char*) instead";
  const Position pos_before = pos();
  const bool read_ok = ReadSomeSlow(max_length, dest);
  RIEGELI_ASSERT_GE(pos(), pos_before)
      << "Reader::ReadSomeSlow(char*) decreased pos()";
  RIEGELI_ASSERT_LE(pos() - pos_before, max_length)
      << "Reader::ReadSomeSlow(char*) read more than requested";
  length_read = IntCast<size_t>(pos() - pos_before);
  if (!read_ok) {
    RIEGELI_ASSERT_EQ(length_read, 0u)
        << "Reader::ReadSomeSlow(char*) failed but read some";
  } else {
    RIEGELI_ASSERT_GT(length_read, 0u)
        << "Reader::ReadSomeSlow(char*) succeeded but read none";
  }
  return read_ok;
}

bool Reader::ReadSomeSlow(size_t max_length, std::string& dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::ReadSomeSlow(string&): "
         "nothing to read, use ReadSome(string&) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::ReadSomeSlow(string&): "
         "some data available, use ReadSome(string&) instead";
  RIEGELI_ASSERT_LE(max_length,
                    std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSomeSlow(string&): "
         "string size overflow";
  const size_t old_size = dest.size();
  bool read_ok;
  riegeli::StringResizeAndOverwriteAmortized(
      dest, old_size + max_length, [&](char* data, size_t size) {
        size_t length_read;
        read_ok = ReadSomeSlow(size - old_size, data + old_size, length_read);
        return old_size + length_read;
      });
  return read_ok;
}

bool Reader::ReadSomeSlow(size_t max_length, std::string& dest,
                          size_t& length_read) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::ReadSomeSlow(string&): "
         "nothing to read, use ReadSome(string&) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::ReadSomeSlow(string&): "
         "some data available, use ReadSome(string&) instead";
  RIEGELI_ASSERT_LE(max_length,
                    std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSomeSlow(string&): "
         "string size overflow";
  const Position pos_before = pos();
  const bool read_ok = ReadSomeSlow(max_length, dest);
  RIEGELI_ASSERT_GE(pos(), pos_before)
      << "Reader::ReadSomeSlow(string&) decreased pos()";
  RIEGELI_ASSERT_LE(pos() - pos_before, max_length)
      << "Reader::ReadSomeSlow(string&) read more than requested";
  length_read = IntCast<size_t>(pos() - pos_before);
  if (!read_ok) {
    RIEGELI_ASSERT_EQ(length_read, 0u)
        << "Reader::ReadSomeSlow(string&) failed but read some";
  } else {
    RIEGELI_ASSERT_GT(length_read, 0u)
        << "Reader::ReadSomeSlow(string&) succeeded but read none";
  }
  return read_ok;
}

bool Reader::ReadSomeSlow(size_t max_length, Chain& dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::ReadSomeSlow(Chain&): "
         "nothing to read, use ReadSome(Chain&) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::ReadSomeSlow(Chain&): "
         "some data available, use ReadSome(Chain&) instead";
  RIEGELI_ASSERT_LE(max_length,
                    std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSomeSlow(Chain&): "
         "Chain size overflow";
  if (ABSL_PREDICT_FALSE(!PullSlow(1, max_length))) return false;
  max_length = UnsignedMin(max_length, available());
  // Should always succeed.
  return ReadAndAppend(max_length, dest);
}

bool Reader::ReadSomeSlow(size_t max_length, Chain& dest, size_t& length_read) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::ReadSomeSlow(Chain&): "
         "nothing to read, use ReadSome(Chain&) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::ReadSomeSlow(Chain&): "
         "some data available, use ReadSome(Chain&) instead";
  RIEGELI_ASSERT_LE(max_length,
                    std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSomeSlow(Chain&): "
         "Chain size overflow";
  const Position pos_before = pos();
  const bool read_ok = ReadSomeSlow(max_length, dest);
  RIEGELI_ASSERT_GE(pos(), pos_before)
      << "Reader::ReadSomeSlow(Chain&) decreased pos()";
  RIEGELI_ASSERT_LE(pos() - pos_before, max_length)
      << "Reader::ReadSomeSlow(Chain&) read more than requested";
  length_read = IntCast<size_t>(pos() - pos_before);
  if (!read_ok) {
    RIEGELI_ASSERT_EQ(length_read, 0u)
        << "Reader::ReadSomeSlow(Chain&) failed but read some";
  } else {
    RIEGELI_ASSERT_GT(length_read, 0u)
        << "Reader::ReadSomeSlow(Chain&) succeeded but read none";
  }
  return read_ok;
}

bool Reader::ReadSomeSlow(size_t max_length, absl::Cord& dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::ReadSomeSlow(Cord&): "
         "nothing to read, use ReadSome(Cord&) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::ReadSomeSlow(Cord&): "
         "some data available, use ReadSome(Cord&) instead";
  RIEGELI_ASSERT_LE(max_length,
                    std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSomeSlow(Cord&): "
         "Cord size overflow";
  if (ABSL_PREDICT_FALSE(!PullSlow(1, max_length))) return false;
  max_length = UnsignedMin(max_length, available());
  // Should always succeed.
  return ReadAndAppend(max_length, dest);
}

bool Reader::ReadSomeSlow(size_t max_length, absl::Cord& dest,
                          size_t& length_read) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::ReadSomeSlow(Cord&): "
         "nothing to read, use ReadSome(Cord&) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::ReadSomeSlow(Cord&): "
         "some data available, use ReadSome(Cord&) instead";
  RIEGELI_ASSERT_LE(max_length,
                    std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSomeSlow(Cord&): "
         "Cord size overflow";
  const Position pos_before = pos();
  const bool read_ok = ReadSomeSlow(max_length, dest);
  RIEGELI_ASSERT_GE(pos(), pos_before)
      << "Reader::ReadSomeSlow(Cord&) decreased pos()";
  RIEGELI_ASSERT_LE(pos() - pos_before, max_length)
      << "Reader::ReadSomeSlow(Cord&) read more than requested";
  length_read = IntCast<size_t>(pos() - pos_before);
  if (!read_ok) {
    RIEGELI_ASSERT_EQ(length_read, 0u)
        << "Reader::ReadSomeSlow(Cord&) failed but read some";
  } else {
    RIEGELI_ASSERT_GT(length_read, 0u)
        << "Reader::ReadSomeSlow(Cord&) succeeded but read none";
  }
  return read_ok;
}

bool Reader::CopySomeSlow(size_t max_length, Writer& dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::CopySomeSlow(Writer&): "
         "nothing to read, use CopySome(Writer&) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::CopySomeSlow(Writer&): "
         "some data available, use CopySome(Writer&) instead";
  if (ABSL_PREDICT_FALSE(!PullSlow(1, max_length))) return false;
  max_length = UnsignedMin(max_length, available());
  if (available() >= max_length && max_length <= kMaxBytesToCopy) {
    const absl::string_view data(cursor(), max_length);
    move_cursor(max_length);
    return dest.Write(data);
  }
  return CopySlow(max_length, dest);
}

bool Reader::CopySomeSlow(size_t max_length, Writer& dest,
                          size_t& length_read) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::CopySomeSlow(Writer&): "
         "nothing to read, use CopySome(Writer&) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::CopySomeSlow(Writer&): "
         "some data available, use CopySome(Writer&) instead";
  const Position pos_before = pos();
  const bool copy_ok = CopySomeSlow(max_length, dest);
  RIEGELI_ASSERT_GE(pos(), pos_before)
      << "Reader::CopySomeSlow(Writer&) decreased pos()";
  RIEGELI_ASSERT_LE(pos() - pos_before, max_length)
      << "Reader::CopySomeSlow(Writer&) read more than requested";
  length_read = IntCast<size_t>(pos() - pos_before);
  if (!copy_ok) {
    if (dest.ok()) {
      RIEGELI_ASSERT_EQ(length_read, 0u)
          << "Reader::CopySomeSlow(Writer&) failed but read some";
    }
  } else {
    RIEGELI_ASSERT_GT(length_read, 0u)
        << "Reader::CopySomeSlow(Writer&) succeeded but read none";
  }
  return copy_ok;
}

void Reader::ReadHintSlow(size_t min_length, size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Reader::ReadHintSlow(): "
         "enough data available, use ReadHint() instead";
}

bool Reader::SyncImpl(SyncType sync_type) { return ok(); }

bool Reader::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of Reader::SeekSlow(): "
         "position in the buffer, use Seek() instead";
  if (ABSL_PREDICT_FALSE(new_pos <= limit_pos())) {
    return Fail(
        absl::UnimplementedError("Reader::Seek() backwards not supported"));
  }
  // Seeking forwards.
  do {
    move_cursor(available());
    if (ABSL_PREDICT_FALSE(!PullSlow(1, 0))) return false;
  } while (new_pos > limit_pos());
  const Position available_length = limit_pos() - new_pos;
  RIEGELI_ASSERT_LE(available_length, start_to_limit())
      << "Reader::PullSlow() skipped some data";
  set_cursor(limit() - available_length);
  return true;
}

std::optional<Position> Reader::SizeImpl() {
  Fail(absl::UnimplementedError("Reader::Size() not supported"));
  return std::nullopt;
}

std::unique_ptr<Reader> Reader::NewReaderImpl(Position initial_pos) {
  Fail(absl::UnimplementedError("Reader::NewReader() not supported"));
  return nullptr;
}

std::unique_ptr<Reader> Reader::NewReaderCurrentPosImpl() {
  return NewReader(pos());
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/reader.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_READER_H_
#define RIEGELI_BYTES_READER_H_

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/macros.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

// Abstract class `Reader` reads sequences of bytes from a source. The nature of
// the source depends on the particular class derived from `Reader`.
//
// A `Reader` object manages a buffer of data pulled from the source, which
// amortizes the overhead of pulling data over multiple reads. Data can be read
// directly from the buffer, and classes derived from `Reader` can avoid copying
// by allocating the buffer in a way which fits the source, e.g. pointing it to
// a fragment of the source itself.
//
// All `Reader`s support reading data sequentially and querying for the current
// position. Some `Reader`s also support random access: changing the position
// backwards for subsequent operations and querying for the total size of the
// source.
class Reader : public Object {
 public:
  // If `read_all_hint` is `true`, hints that all remaining bytes will be read
  // sequentially, then `VerifyEndAndClose()` will be called. This also
  // indicates that reading ahead more than immediately needed is acceptable,
  // as if `ToleratesReadingAhead()` was `true`.
  //
  // This may improve performance and memory usage:
  //  * More data than immediately needed may be read ahead, with
  //    `ToleratesReadingAhead()` possibly becoming `true` even for reading from
  //    an interactive stream.
  //  * Larger buffer sizes may be used.
  //  * This hint may be propagated to owned sources.
  //  * Other consequences are possible.
  //
  // If the hint turns out to not match reality, nothing breaks, assuming that
  // reading ahead more than immediately needed is acceptable.
  //
  // `SetReadAllHint()` is usually be called from the same abstraction layer
  // which later calls `VerifyEndAndClose()`.
  void SetReadAllHint(bool read_all_hint);

  // Verifies that the source ends at the current position, failing the `Reader`
  // with an `absl::InvalidArgumentError()` if not. Closes the `Reader`.
  //
  // This is an alternative to `Close()` if the presence of unread data at the
  // current position should be treated as an error.
  //
  // If `*this` reads data from an owned source, such as a decompressor reading
  // compressed data, then generally the source is verified too.
  //
  // Return values:
  //  * `true`  - success (the source ends at the former current position)
  //  * `false` - failure (the source does not end at the former current
  //                       position or the `Reader` was not OK before closing)
  bool VerifyEndAndClose();

  // Verifies that the source ends at the current position, failing the `Reader`
  // with an `absl::InvalidArgumentError()` if not.
  //
  // If `*this` reads data from an owned source, such as a decompressor reading
  // compressed data, then generally the source is verified too.
  void VerifyEnd();

  // Ensures that enough data are available in the buffer: if less than
  // `min_length` of data is available, pulls more data from the source, and
  // points `cursor()` and `limit()` to data following the current position
  // with length at least `min_length`, preferably `recommended_length`.
  //
  // The current position does not change with `Pull()`. It changes with e.g.
  // `move_cursor()` and `Read()`.
  //
  // If `recommended_length < min_length`, `recommended_length` is assumed to be
  // `min_length`.
  //
  // Return values:
  //  * `true`                 - success (`available() >= min_length`)
  //  * `false` (when `ok()`)  - source ends (`available() < min_length`)
  //  * `false` (when `!ok()`) - failure (`available() < min_length`)
  bool Pull(size_t min_length = 1, size_t recommended_length = 0);

  // Buffer pointers. Data between `start()` and `limit()` are available for
  // immediate reading, with `cursor()` pointing to the current position.
  //
  // Non-const member functions may change buffer pointers, including changing
  // how much data around the current position are buffered.
  //
  // Invariants:
  //   `start() <= cursor() <= limit()` (possibly all `nullptr`)
  //   if `!is_open()` then `start() == cursor() == limit() == nullptr`
  const char* start() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return start_; }
  const char* cursor() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return cursor_; }
  const char* limit() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return limit_; }

  // Increments the value of `cursor()`. Does not change `start()` nor
  // `limit()`. Call this during reading data under `cursor()` to indicate how
  // much was read.
  //
  // Precondition: `length <= available()`
  void move_cursor(size_t length);

  // Sets the value of `cursor()`. Does not change `start()` nor `limit()`. Call
  // this during reading data under `cursor()` to indicate how much was read, or
  // to seek within the buffer.
  //
  // Precondition: `start() <= cursor <= limit()`
  void set_cursor(const char* cursor);

  // Returns the amount of data available in the buffer, between `cursor()` and
  // `limit()`.
  //
  // It is possible that a `Reader` has a looming failure:
  // `!ok() && available() > 0`. This means that the source failed but some data
  // are already buffered and can be read before experiencing the failure.
  //
  // Invariant: if `!is_open()` then `available() == 0`
  size_t available() const { return PtrDistance(cursor_, limit_); }

  // Returns the buffer size, between `start()` and `limit()`.
  size_t start_to_limit() const { return PtrDistance(start_, limit_); }

  // Returns the amount of data read from the buffer, between `start()` and
  // `cursor()`.
  size_t start_to_cursor() const { return PtrDistance(start_, cursor_); }

  // Reads a single byte from the buffer or the source.
  //
  // Return values:
  //  * `true`                 - success (`dest` is set)
  //  * `false` (when `ok()`)  - source ends (`dest` is undefined)
  //  * `false` (when `!ok()`) - failure (`dest` is undefined)
  bool ReadByte(uint8_t& dest);

  // Reads a fixed number of bytes from the buffer and/or the source to `dest`,
  // clearing any existing data in `dest`.
  //
  // `Read(absl::string_view&)` points `dest` to an array holding the data. The
  // array is valid until the next non-const operation on the `Reader`.
  //
  // If `length_read != nullptr` then sets `*length_read` to the length read.
  // This is equal to the difference between `pos()` after and before the call,
  // and is equal to `length` if `Read()` returned `true`.
  //
  // Return values:
  //  * `true`                 - success (`length` bytes read)
  //  * `false` (when `ok()`)  - source ends (less than `length` bytes read)
  //  * `false` (when `!ok()`) - failure (less than `length` bytes read)
  bool Read(char& dest);
  bool Read(size_t length, absl::string_view& dest,
            size_t* length_read = nullptr);
  bool Read(size_t length, char* dest, size_t* length_read = nullptr);
  bool Read(size_t length, std::string& dest, size_t* length_read = nullptr);
  bool Read(size_t length, Chain& dest, size_t* length_read = nullptr);
  bool Read(size_t length, absl::Cord& dest, size_t* length_read = nullptr);

  // Reads a fixed number of bytes from the buffer and/or the source to `dest`,
  // appending to any existing data in `dest`.
  //
  // If `length_read != nullptr` then sets `*length_read` to the length read.
  // This is equal to the difference between `pos()` or `dest.size()` after and
  // before the call, and is equal to `length` if `ReadAndAppend()` returned
  // `true`.
  //
  // Precondition for `ReadAndAppend(std::string&)`, `ReadAndAppend(Chain&)`,
  // and `ReadAndAppend(absl::Cord&)`:
  //   `length <= std::numeric_limits<size_t>::max() - dest->size()`
  //
  // Return values:
  //  * `true`                 - success (`length` bytes read)
  //  * `false` (when `ok()`)  - source ends (less than `length` bytes read)
  //  * `false` (when `!ok()`) - failure (less than `length` bytes read)
  bool ReadAndAppend(size_t length, std::string& dest,
                     size_t* length_read = nullptr);
  bool ReadAndAppend(size_t length, Chain& dest, size_t* length_read = nullptr);
  bool ReadAndAppend(size_t length, absl::Cord& dest,
                     size_t* length_read = nullptr);

  // Reads a fixed number of bytes from the buffer and/or the source to `dest`.
  //
  // If `length_read != nullptr` then sets `*length_read` to the length read.
  // This is equal to the difference between `pos()` after and before the call,
  // and is equal to `length` if `Copy()` returned `true`.
  //
  // Return values:
  //  * `true`                             - success (`length` bytes copied)
  //  * `false` (when `dest.ok() && ok()`) - source ends (`Writer`: less than
  //                                         `length` bytes copied;
  //                                         `BackwardWriter`: nothing copied)
  //  * `false` (when `!dest.ok()`)        - destination failed (data lost)
  //  * `false` (when `!ok()`)             - source failed (`Writer`: less than
  //                                         `length` bytes copied;
  //                                         `BackwardWriter`: nothing copied)
  bool Copy(Position length, Writer& dest, Position* length_read = nullptr);
  bool Copy(size_t length, BackwardWriter& dest);

  // Reads at most `max_length` bytes from the buffer and/or the source to
  // `dest`, clearing any existing data in `dest`.
  //
  // In contrast to `Read()`, `ReadSome()` may read less than `max_length`
  // before reaching the end of the source if less data are available earlier.
  //
  // `ReadSome(absl::string_view&)` points `dest` to an array holding the data.
  // The array is valid until the next non-const operation on the `Reader`.
  //
  // If `length_read != nullptr` then sets `*length_read` to the length read.
  // This is equal to the difference between `pos()` after and before the call.
  //
  // Return values:
  //  * `true`                 - success (some bytes read or `max_length == 0`)
  //  * `false` (when `ok()`)  - source ends
  //                                     (no bytes read and `max_length > 0`)
  //  * `false` (when `!ok()`) - failure (no bytes read and `max_length > 0`)
  bool ReadSome(size_t max_length, absl::string_view& dest,
                size_t* length_read = nullptr);
  bool ReadSome(size_t max_length, char* dest, size_t* length_read = nullptr);
  bool ReadSome(size_t max_length, std::string& dest,
                size_t* length_read = nullptr);
  bool ReadSome(size_t max_length, Chain& dest, size_t* length_read = nullptr);
  bool ReadSome(size_t max_length, absl::Cord& dest,
                size_t* length_read = nullptr);

  // Reads at most `max_length` bytes from the buffer and/or the source to
  // `dest`, appending to any existing data in `dest`.
  //
  // In contrast to `ReadAndAppend()`, `ReadAndAppendSome()` may read less than
  // `max_length` before reaching the end of the source if less data are
  // available earlier.
  //
  // If `length_read != nullptr` then sets `*length_read` to the length read.
  // This is equal to the difference between `pos()` after and before the call.
  //
  // Precondition for `ReadAndAppendSome(std::string&)`,
  // `ReadAndAppendSome(Chain&)`, and `ReadAndAppend(absl::Cord&)`:
  //   `max_length == 0 || dest->size() < std::numeric_limits<size_t>::max()`
  //
  // Return values:
  //  * `true`                 - success (some bytes read or `max_length == 0`)
  //  * `false` (when `ok()`)  - source ends
  //                                     (no bytes read and `max_length > 0`)
  //  * `false` (when `!ok()`) - failure (no bytes read and `max_length > 0`)
  bool ReadAndAppendSome(size_t max_length, std::string& dest,
                         size_t* length_read = nullptr);
  bool ReadAndAppendSome(size_t max_length, Chain& dest,
                         size_t* length_read = nullptr);
  bool ReadAndAppendSome(size_t max_length, absl::Cord& dest,
                         size_t* length_read = nullptr);

  // Reads at most `max_length` bytes from the buffer and/or the source to
  // `dest`.
  //
  // In contrast to `Copy()`, `CopySome()` may read less than `max_length`
  // before reaching the end of the source if less data are available earlier.
  //
  // If `length_read != nullptr` then sets `*length_read` to the length read.
  // This is equal to the difference between `pos()` after and before the call.
  //
  // Return values:
  //  * `true`                             - success (some bytes copied
  //                                         or `max_length == 0`)
  //  * `false` (when `dest.ok() && ok()`) - source ends (no bytes copied
  //                                         and `max_length > 0`)
  //  * `false` (when `!dest.ok()`)        - destination failed (no bytes copied
  //                                         and `max_length > 0`)
  //  * `false` (when `!ok()`)             - source failed (no bytes copied
  //                                         and `max_length > 0`)
  bool CopySome(size_t max_length, Writer& dest, size_t* length_read = nullptr);

  // Hints that several consecutive `Pull()`, `Read()`, or `Copy()` calls will
  // follow, reading this amount of data in total.
  //
  // This can make these calls faster by prefetching all the data at once into
  // memory. In contrast to `Pull()`, the data are not necessarily flattened
  // into a single array.
  //
  // `ReadHint()` ensures that at least `min_length`, preferably
  // `recommended_length` is available in memory.
  //
  // If `recommended_length < min_length`, `recommended_length` is assumed to be
  // `min_length`.
  void ReadHint(size_t min_length = 1, size_t recommended_length = 0);

  // Synchronizes the current position to the source and discards buffered data
  // read from the source (if applicable).
  //
  // In contrast to `Close()`, keeps the possibility to read more data later.
  // What exactly does it mean for the position to be synchronized depends on
  // the source. If this is not applicable or not feasible, does nothing.
  //
  // The scope of objects to synchronize is specified by `sync_type`:
  //  * `SyncType::kFromObject`  - Propagates synchronization through owned
  //                               dependencies of the given reader.
  //  * `SyncType::kFromProcess` - Propagates synchronization through all
  //                               dependencies of the given reader.
  //                               This is the default.
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  bool Sync(SyncType sync_type = SyncType::kFromProcess);

  // Returns `true` if reading ahead more than needed is known to be tolerable.
  // This might not be the case e.g. for reading from an interactive stream,
  // when it might be important to ensure returning with available data before
  // waiting for more data.
  //
  // This can be used for optimizations, e.g. to fill a whole allocated buffer
  // instead of a partial buffer, and thus avoid returning a `Chain` or `Cord`
  // pointing to a partially empty block.
  virtual bool ToleratesReadingAhead() { return false; }

  // Returns the current position.
  //
  // This is often 0 after creating the `Reader`, but not necessarily if the
  // `Reader` wraps another reader or input stream propagating its position.
  //
  // `pos()` is unchanged by `Close()`.
  Position pos() const;

  // Returns the position corresponding to `start()`,
  // i.e. `pos() - start_to_cursor()`.
  Position start_pos() const;

  // Returns the position corresponding to `limit()`,
  // i.e. `pos() + available()`.
  Position limit_pos() const { return limit_pos_; }

  // Returns `true` if this `Reader` supports efficient `Seek()`, `Skip()`,
  // `Size()`, and also `NewReader()` if `SupportsNewReader()` is `true`.
  //
  // Invariant: if `SupportsRandomAccess()` then `SupportsRewind()`
  //                                         and `SupportsSize()`
  virtual bool SupportsRandomAccess() { return false; }

  // Returns `true` if this `Reader` supports `Seek()` backwards (`Seek()`
  // forwards is always supported).
  //
  // Even if `SupportsRewind()` is `true`, `Seek()` can be inefficient if
  // `SupportsRandomAccess()` is `false`.
  //
  // Invariant: if `SupportsRandomAccess()` then `SupportsRewind()`
  virtual bool SupportsRewind() { return SupportsRandomAccess(); }

  // Sets the current position for subsequent operations.
  //
  // Return values:
  //  * `true`                 - success (position is set to `new_pos`)
  //  * `false` (when `ok()`)  - source ends before `new_pos`
  //                             (position is set to the end)
  //  * `false` (when `!ok()`) - failure
  //
  // `Seek()` forwards (or backwards but within the buffer) is always supported,
  // although if `SupportsRandomAccess()` is `false`, then it is as inefficient
  // as reading and discarding the intervening data.
  //
  // `Seek()` backwards is supported and efficient if `SupportsRandomAccess()`
  // is `true`. Otherwise, if `SupportsRewind()` is `true`, `Seek()` backwards
  // is as inefficient as seeking to 0, and then reading and discarding the
  // intervening data. If `SupportsRewind()` is `false`, `Seek()` backwards is
  // not supported.
  bool Seek(Position new_pos);

  // Increments the current position. Same as `Seek(pos() + length)` if there is
  // no overflow.
  //
  // The current position might decrease if the source size decreased.
  //
  // If `length_skipped != nullptr` then sets `*length_skipped` to the length
  // skipped. This is equal to the difference between `pos()` after and before
  // the call (saturated to 0), and is equal to `length` if `Skip()` returned
  // `true`.
  //
  // Return values:
  //  * `true`                 - success (`length` bytes skipped)
  //  * `false` (when `ok()`)  - source ends before skipping `length` bytes
  //                             (position is set to the end)
  //  * `false` (when `!ok()`) - failure
  //
  // `Skip()` is always supported, although if `SupportsRandomAccess()` is
  // `false`, then it is as inefficient as reading and discarding the
  // intervening data.
  bool Skip(Position length, Position* length_skipped = nullptr);

  // Returns `true` if this `Reader` supports `Size()`.
  //
  // Invariant: if `SupportsRandomAccess()` then `SupportsSize()`.
  virtual bool SupportsSize() { return SupportsRandomAccess(); }

  // Returns the size of the source, i.e. the position corresponding to its end.
  //
  // Returns `std::nullopt` on failure (`!ok()`).
  //
  // `Size()` is supported if `SupportsRandomAccess()` or `SupportsSize()` is
  // `true`.
  std::optional<Position> Size();

  // Returns `true` if this `Reader` supports `NewReader()`.
  virtual bool SupportsNewReader() { return false; }

  // Returns a `Reader` which reads from the same source, but has an independent
  // current position, starting from `initial_pos`, defaulting to `pos()`.
  // The returned `Reader` can be used concurrently with this `Reader` and other
  // siblings.
  //
  // If the source ends before `initial_pos`, the position of the new `Reader`
  // is set to the end. The resulting `Reader` supports `Seek()` and
  // `NewReader()`.
  //
  // The new `Reader` does not own the source, even if the this `Reader` does.
  // The source of this `Reader` must not be changed until the new `Reader` is
  // closed or no longer used.
  //
  // Returns `nullptr` on failure (`!ok()`).
  //
  // `NewReader()` is supported if `SupportsNewReader()` is `true`.
  //
  // If `SupportsNewReader()` is `true`, then `NewReader()` is effectively const
  // and thread-safe. It may be called concurrently with other operations, even
  // non-const ones.
  //
  // If `SupportsNewReader()` and `ok()` are `true`, then `NewReader()` does not
  // return `nullptr`.
  std::unique_ptr<Reader> NewReader(Position initial_pos) {
    return NewReaderImpl(initial_pos);
  }

  // Like `NewReader(pos())`. It can be more efficient because for some classes
  // the current buffer can be shared. It offers fewer thread safety guarantees.
  //
  // If `SupportsNewReader()` is `true`, then `NewReaderCurrentPos()` is
  // effectively const but not thread-safe. It may be called concurrently
  // with const operations and with other calls to `NewReader()` and
  // `NewReaderCurrentPos()`, but not with non-const operations.
  //
  // If `SupportsNewReader()` and `ok()` are `true`, then
  // `NewReaderCurrentPos()` does not return `nullptr`.
  std::unique_ptr<Reader> NewReaderCurrentPos() {
    return NewReaderCurrentPosImpl();
  }
  ABSL_DEPRECATE_AND_INLINE() std::unique_ptr<Reader> NewReader() {
    return NewReaderCurrentPos();
  }

 protected:
  using Object::Object;

  // Moves the part of the object defined in this class.
  //
  // Buffer pointers do not need to satisfy their invariants during this part of
  // the move, here they are merely exchanged with `nullptr` and copied.
  Reader(Reader&& that) noexcept;
  Reader& operator=(Reader&& that) noexcept;

  // Makes `*this` equivalent to a newly constructed `Reader`. This avoids
  // constructing a temporary `Reader` and moving from it. Derived classes which
  // redefine `Reset()` should include a call to `Reader::Reset()`.
  void Reset(Closed);
  void Reset();

  // `Reader` overrides `Object::Done()` to set buffer pointers to `nullptr`.
  // Derived classes which override it further should include a call to
  // `Reader::Done()`.
  void Done() override;

  // `Reader` overrides `Object::AnnotateStatusImpl()` to annotate the status
  // with the current position.
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;

  // Marks the `Reader` as failed with message "Reader position overflow".
  // Always returns `false`.
  //
  // This can be called if `limit_pos()` would overflow.
  ABSL_ATTRIBUTE_COLD bool FailOverflow();

  // Implementation of `VerifyEnd()`.
  //
  // By default is implemented in terms of `Pull()`.
  virtual void VerifyEndImpl();

  // Implementation of `SetReadAllHint()`.
  virtual void SetReadAllHintImpl(ABSL_ATTRIBUTE_UNUSED bool read_all_hint) {}

  // Implementation of the slow part of `Pull()`.
  //
  // Precondition: `available() < min_length`
  virtual bool PullSlow(size_t min_length, size_t recommended_length) = 0;

  // Sets the values of:
  //  * `start()`  - to `start`
  //  * `cursor()` - to `start + start_to_cursor`
  //  * `limit()`  - to `start + start_to_limit`
  //
  // Preconditions:
  //   [`start`..`start + start_to_limit`) is a valid byte range
  //   `start_to_cursor <= start_to_limit`
  void set_buffer(const char* start = nullptr, size_t start_to_limit = 0,
                  size_t start_to_cursor = 0);

  // Implementations of the slow part of `Read()`, `ReadAndAppend()`, `Copy()`,
  // `ReadSome()`, `ReadAndAppendSome()`, and `CopySome()`.
  //
  // `ReadSlow(std::string&)`, `ReadSlow(Chain&)`, `ReadSlow(absl::Cord&)`,
  // `ReadSomeSlow(std::string&)`, `ReadSomeSlow(Chain&)`, and
  // `ReadSomeSlow(absl::Cord&)` append to any existing data in `dest`.
  //
  // By default:
  //  * `ReadSlow(char*)`, `CopySlow(Writer&)`, and `ReadSomeSlow(char*)`
  //    are implemented in terms of `PullSlow()`
  //  * `ReadSlow(std::string&)`, `ReadSlow(Chain&)`, and
  //    `ReadSlow(absl::Cord&)` are implemented in terms of `ReadSlow(char*)`
  //  * `CopySlow(BackwardWriter&)` is implemented in terms of `ReadSlow(char*)`
  //    and `ReadSlow(Chain&)`
  //  * `ReadSomeSlow(std::string&)` is implemented in terms of
  //    `ReadSomeSlow(char*)`
  //  * `ReadSomeSlow(Chain&)` is implemented in terms of `PullSlow()` and
  //    `ReadSlow(Chain&)`
  //  * `ReadSomeSlow(absl::Cord&)` is implemented in terms of `PullSlow()` and
  //    `ReadSlow(absl::Cord&)`
  //  * `CopySomeSlow(Writer&)` is implemented in terms of `PullSlow()` and
  //    `CopySlow(Writer&)`
  //
  // Precondition for `ReadSlow(char*)` and `ReadSlow(std::string&)`:
  //   `available() < length`
  //
  // Precondition for `ReadSlow(Chain&)`, `ReadSlow(absl::Cord&)`,
  // `CopySlow(Writer&)`, and `CopySlow(BackwardWriter&)`:
  //   `UnsignedMin(available(), kMaxBytesToCopy) < length`
  //
  // Preconditions for `ReadSomeSlow(char*)`, `ReadSomeSlow(std::string&)`,
  // `ReadSomeSlow(Chain&)`, `ReadSomeSlow(absl::Cord&)`, and
  // `CopySomeSlow(Writer&)`:
  //   `max_length > 0`
  //   `available() == 0`
  //
  // Additional precondition for `ReadSlow(std::string&)`, `ReadSlow(Chain&)`,
  // and `ReadSlow(absl::Cord&)`:
  //   `length <= std::numeric_limits<size_t>::max() - dest->size()`
  //
  // Additional precondition for `ReadSomeSlow(std::string&)`,
  // `ReadSomeSlow(Chain&)`, and `ReadSomeSlow(absl::Cord&)`:
  //   `max_length <= std::numeric_limits<size_t>::max() - dest->size()`
  virtual bool ReadSlow(size_t length, char* dest);
  bool ReadSlow(size_t length, char* dest, size_t& length_read);
  bool ReadSlow(size_t length, std::string& dest);
  bool ReadSlow(size_t length, std::string& dest, size_t& length_read);
  virtual bool ReadSlow(size_t length, Chain& dest);
  bool ReadSlow(size_t length, Chain& dest, size_t& length_read);
  virtual bool ReadSlow(size_t length, absl::Cord& dest);
  bool ReadSlow(size_t length, absl::Cord& dest, size_t& length_read);
  virtual bool CopySlow(Position length, Writer& dest);
  bool CopySlow(Position length, Writer& dest, Position& length_read);
  virtual bool CopySlow(size_t length, BackwardWriter& dest);
  virtual bool ReadSomeSlow(size_t max_length, char* dest);
  bool ReadSomeSlow(size_t max_length, char* dest, size_t& length_read);
  bool ReadSomeSlow(size_t max_length, std::string& dest);
  bool ReadSomeSlow(size_t max_length, std::string& dest, size_t& length_read);
  bool ReadSomeSlow(size_t max_length, Chain& dest);
  bool ReadSomeSlow(size_t max_length, Chain& dest, size_t& length_read);
  bool ReadSomeSlow(size_t max_length, absl::Cord& dest);
  bool ReadSomeSlow(size_t max_length, absl::Cord& dest, size_t& length_read);
  virtual bool CopySomeSlow(size_t max_length, Writer& dest);
  bool CopySomeSlow(size_t max_length, Writer& dest, size_t& length_read);

  // Implementation of the slow part of `ReadHint()`.
  //
  // By default does nothing.
  //
  // Precondition: `available() < min_length`
  virtual void ReadHintSlow(size_t min_length, size_t recommended_length);

  // Implementation of `Sync()`, except that the parameter is not defaulted,
  // which is problematic for virtual functions.
  //
  // By default does nothing and returns `ok()`.
  virtual bool SyncImpl(SyncType sync_type);

  // Increments the value of `limit_pos()`.
  void move_limit_pos(Position length);

  // Sets the value of `limit_pos()`.
  void set_limit_pos(Position limit_pos);

  // Implementation of the slow part of `Seek()` and `Skip()`.
  //
  // By default seeking forwards is implemented in terms of `Pull()`, and
  // seeking backwards fails.
  //
  // Precondition: `new_pos < start_pos() || new_pos > limit_pos()`
  virtual bool SeekSlow(Position new_pos);

  // Implementation of `Size()`.
  //
  // By default fails.
  virtual std::optional<Position> SizeImpl();

  virtual std::unique_ptr<Reader> NewReaderImpl(Position initial_pos);
  virtual std::unique_ptr<Reader> NewReaderCurrentPosImpl();

 private:
  const char* start_ = nullptr;
  const char* cursor_ = nullptr;
  const char* limit_ = nullptr;

  // Source position corresponding to `limit_`.
  //
  // Invariant: `limit_pos_ >= start_to_limit()`
  Position limit_pos_ = 0;
};

// Implementation details follow.

inline Reader::Reader(Reader&& that) noexcept
    : Object(static_cast<Object&&>(that)),
      start_(std::exchange(that.start_, nullptr)),
      cursor_(std::exchange(that.cursor_, nullptr)),
      limit_(std::exchange(that.limit_, nullptr)),
      limit_pos_(std::exchange(that.limit_pos_, 0)) {}

inline Reader& Reader::operator=(Reader&& that) noexcept {
  Object::operator=(static_cast<Object&&>(that));
  start_ = std::exchange(that.start_, nullptr);
  cursor_ = std::exchange(that.cursor_, nullptr);
  limit_ = std::exchange(that.limit_, nullptr);
  limit_pos_ = std::exchange(that.limit_pos_, 0);
  return *this;
}

inline void Reader::Reset(Closed) {
  Object::Reset(kClosed);
  start_ = nullptr;
  cursor_ = nullptr;
  limit_ = nullptr;
  limit_pos_ = 0;
}

inline void Reader::Reset() {
  Object::Reset();
  start_ = nullptr;
  cursor_ = nullptr;
  limit_ = nullptr;
  limit_pos_ = 0;
}

inline void Reader::Done() {
  limit_pos_ = pos();
  set_buffer();
}

inline bool Reader::VerifyEndAndClose() {
  VerifyEnd();
  return Close();
}

inline void Reader::VerifyEnd() { VerifyEndImpl(); }

inline void Reader::SetReadAllHint(bool read_all_hint) {
  SetReadAllHintImpl(read_all_hint);
}

inline bool Reader::Pull(size_t min_length, size_t recommended_length) {
  if (ABSL_PREDICT_TRUE(available() >= min_length)) return true;
  if (ABSL_PREDICT_FALSE(!PullSlow(min_length, recommended_length))) {
    return false;
  }
  RIEGELI_ASSERT_GE(available(), min_length)
      << "Failed postcondition of Reader::PullSlow(): "
         "not enough data available";
  return true;
}

inline void Reader::move_cursor(size_t length) {
  RIEGELI_ASSERT_LE(length, available())
      << "Failed precondition of Reader::move_cursor(): length out of range";
  cursor_ += length;
}

inline void Reader::set_cursor(const char* cursor) {
  RIEGELI_ASSERT_GE(cursor, start())
      << "Failed precondition of Reader::set_cursor(): pointer out of range";
  RIEGELI_ASSERT_LE(cursor, limit())
      << "Failed precondition of Reader::set_cursor(): pointer out of range";
  cursor_ = cursor;
}

inline void Reader::set_buffer(const char* start, size_t start_to_limit,
                               size_t start_to_cursor) {
  RIEGELI_ASSERT_LE(start_to_cursor, start_to_limit)
      << "Failed precondition of Reader::set_buffer(): length out of range";
  start_ = start;
  cursor_ = start + start_to_cursor;
  limit_ = start + start_to_limit;
}

inline bool Reader::ReadByte(uint8_t& dest) {
  if (ABSL_PREDICT_FALSE(!Pull())) return false;
  dest = static_cast<uint8_t>(*cursor());
  move_cursor(1);
  return true;
}

inline bool Reader::Read(char& dest) {
  if (ABSL_PREDICT_FALSE(!Pull())) return false;
  dest = *cursor();
  move_cursor(1);
  return true;
}

inline bool Reader::Read(size_t length, absl::string_view& dest,
                         size_t* length_read) {
  const bool pull_ok = Pull(length);
  if (ABSL_PREDICT_FALSE(!pull_ok)) length = available();
  dest = absl::string_view(cursor(), length);
  move_cursor(length);
  if (length_read != nullptr) *length_read = length;
  return pull_ok;
}

inline bool Reader::Read(size_t length, char* dest, size_t* length_read) {
  if (ABSL_PREDICT_TRUE(available() >= length)) {
    riegeli::null_safe_memcpy(dest, cursor(), length);
    move_cursor(length);
    if (length_read != nullptr) *length_read = length;
    return true;
  }
  if (length_read != nullptr) return ReadSlow(length, dest, *length_read);
  return ReadSlow(length, dest);
}

inline bool Reader::ReadSome(size_t max_length, absl::string_view& dest,
                             size_t* length_read) {
  const bool pull_ok =
      ABSL_PREDICT_FALSE(max_length == 0) || Pull(1, max_length);
  const size_t length = UnsignedMin(max_length, available());
  dest = absl::string_view(cursor(), length);
  move_cursor(length);
  if (length_read != nullptr) *length_read = length;
  return pull_ok;
}

inline bool Reader::ReadSome(size_t max_length, char* dest,
                             size_t* length_read) {
  if (ABSL_PREDICT_TRUE(available() > 0) ||
      ABSL_PREDICT_FALSE(max_length == 0)) {
    max_length = UnsignedMin(max_length, available());
    riegeli::null_safe_memcpy(dest, cursor(), max_length);
    move_cursor(max_length);
    if (length_read != nullptr) *length_read = max_length;
    return true;
  }
  if (length_read != nullptr) {
    return ReadSomeSlow(max_length, dest, *length_read);
  }
  return ReadSomeSlow(max_length, dest);
}

inline void Reader::ReadHint(size_t min_length, size_t recommended_length) {
  if (ABSL_PREDICT_TRUE(available() >= min_length)) return;
  ReadHintSlow(min_length, recommended_length);
}

inline bool Reader::Sync(SyncType sync_type) { return SyncImpl(sync_type); }

inline Position Reader::pos() const {
  RIEGELI_ASSERT_GE(limit_pos_, start_to_limit())
      << "Failed invariant of Reader: negative position of buffer start";
  return limit_pos_ - available();
}

inline Position Reader::start_pos() const {
  RIEGELI_ASSERT_GE(limit_pos_, start_to_limit())
      << "Failed invariant of Reader: negative position of buffer start";
  return limit_pos_ - start_to_limit();
}

inline void Reader::move_limit_pos(Position length) {
  RIEGELI_ASSERT_LE(length, std::numeric_limits<Position>::max() - limit_pos_)
      << "Failed precondition of Reader::move_limit_pos(): "
         "position out of range";
  limit_pos_ += length;
}

inline void Reader::set_limit_pos(Position limit_pos) {
  limit_pos_ = limit_pos;
}

inline bool Reader::Seek(Position new_pos) {
  if (ABSL_PREDICT_TRUE(new_pos >= start_pos() && new_pos <= limit_pos())) {
    set_cursor(limit() - (limit_pos() - new_pos));
    return true;
  }
  return SeekSlow(new_pos);
}

inline bool Reader::Skip(Position length, Position* length_skipped) {
  if (ABSL_PREDICT_TRUE(available() >= length)) {
    move_cursor(IntCast<size_t>(length));
    if (length_skipped != nullptr) *length_skipped = length;
    return true;
  }
  if (length_skipped == nullptr) {
    if (ABSL_PREDICT_FALSE(length >
                           std::numeric_limits<Position>::max() - pos())) {
      SeekSlow(std::numeric_limits<Position>::max());
      return false;
    }
    return SeekSlow(pos() + length);
  }
  const Position pos_before = pos();
  bool seek_ok;
  if (ABSL_PREDICT_FALSE(length >
                         std::numeric_limits<Position>::max() - pos_before)) {
    SeekSlow(std::numeric_limits<Position>::max());
    seek_ok = false;
  } else {
    seek_ok = SeekSlow(pos_before + length);
  }
  // `SeekSlow()` could have decreased `pos()` if the source decreased its
  // size.
  RIEGELI_ASSERT_LE(pos(), pos_before + length)
      << "Reader::SeekSlow() skipped more than requested";
  if (ABSL_PREDICT_FALSE(!seek_ok)) {
    *length_skipped = SaturatingSub(pos(), pos_before);
    return false;
  }
  RIEGELI_ASSERT_EQ(pos(), pos_before + length)
      << "Reader::SeekSlow() succeeded but skipped less than requested";
  *length_skipped = length;
  return true;
}

inline std::optional<Position> Reader::Size() { return SizeImpl(); }

}  // namespace riegeli

#endif  // RIEGELI_BYTES_READER_H_


================================================
FILE: riegeli/bytes/reader_cfile.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Make `fopencookie()` and `off64_t` available.
#if !defined(_GNU_SOURCE)
#define _GNU_SOURCE
#endif

#include "riegeli/bytes/reader_cfile.h"

#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/types.h>

#include <cerrno>
#include <limits>
#include <optional>
#include <string>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/errno_mapping.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/cfile_handle.h"
#include "riegeli/bytes/path_ref.h"
#include "riegeli/bytes/reader.h"

namespace riegeli::cfile_internal {

ReaderCFileCookieBase::~ReaderCFileCookieBase() {}

inline ssize_t ReaderCFileCookieBase::Read(char* dest, size_t length) {
  Reader& reader = *SrcReader();
  size_t length_read;
  if (ABSL_PREDICT_FALSE(!reader.ReadSome(length, dest, &length_read) &&
                         !reader.ok())) {
    errno = StatusCodeToErrno(reader.status().code());
    return -1;
  }
  return IntCast<ssize_t>(length_read);
}

inline std::optional<int64_t> ReaderCFileCookieBase::Seek(int64_t offset,
                                                          int whence) {
  Reader& reader = *SrcReader();
  Position new_pos;
  switch (whence) {
    case SEEK_SET:
      if (ABSL_PREDICT_FALSE(offset < 0)) {
        errno = EINVAL;
        return std::nullopt;
      }
      new_pos = IntCast<Position>(offset);
      break;
    case SEEK_CUR:
      new_pos = reader.pos();
      if (offset < 0) {
        if (ABSL_PREDICT_FALSE(NegatingUnsignedCast(offset) > new_pos)) {
          errno = EINVAL;
          return std::nullopt;
        }
        new_pos -= NegatingUnsignedCast(offset);
        if (ABSL_PREDICT_FALSE(new_pos >
                               Position{std::numeric_limits<int64_t>::max()})) {
          errno = EINVAL;
          return std::nullopt;
        }
      } else {
        if (ABSL_PREDICT_FALSE(
                new_pos > Position{std::numeric_limits<int64_t>::max()} ||
                IntCast<Position>(offset) >
                    Position{std::numeric_limits<int64_t>::max()} - new_pos)) {
          errno = EINVAL;
          return std::nullopt;
        }
        new_pos += IntCast<Position>(offset);
      }
      break;
    case SEEK_END: {
      if (ABSL_PREDICT_FALSE(!reader.SupportsSize())) {
        // Indicate that `fseek(SEEK_END)` is not supported.
        errno = ESPIPE;
        return std::nullopt;
      }
      const std::optional<Position> size = reader.Size();
      if (ABSL_PREDICT_FALSE(size == std::nullopt)) {
        errno = StatusCodeToErrno(reader.status().code());
        return std::nullopt;
      }
      if (ABSL_PREDICT_FALSE(offset > 0 ||
                             NegatingUnsignedCast(offset) > *size)) {
        errno = EINVAL;
        return std::nullopt;
      }
      new_pos = *size - NegatingUnsignedCast(offset);
      if (ABSL_PREDICT_FALSE(new_pos >
                             Position{std::numeric_limits<int64_t>::max()})) {
        errno = EINVAL;
        return std::nullopt;
      }
    } break;
    default:
      RIEGELI_ASSUME_UNREACHABLE() << "Unknown seek origin: " << whence;
  }
  if (new_pos >= reader.pos()) {
    // Seeking forwards is supported even if random access is not.
  } else if (ABSL_PREDICT_FALSE(!reader.SupportsRewind())) {
    // Indicate that `fseek()` is not supported.
    errno = ESPIPE;
    return std::nullopt;
  }
  if (ABSL_PREDICT_FALSE(!reader.Seek(new_pos))) {
    if (ABSL_PREDICT_FALSE(!reader.ok())) {
      errno = StatusCodeToErrno(reader.status().code());
    } else {
      errno = EINVAL;
    }
    return std::nullopt;
  }
  return IntCast<int64_t>(new_pos);
}

// `extern "C"` sets the C calling convention for compatibility with
// `fopencookie()`. `static` avoids making symbols public, as `extern "C"`
// trumps anonymous namespace.
extern "C" {

static ssize_t ReaderCFileRead(void* cookie, char* buf, size_t size) {
  return static_cast<ReaderCFileCookieBase*>(cookie)->Read(buf, size);
}

static int ReaderCFileSeek(void* cookie, off64_t* offset, int whence) {
  const std::optional<int64_t> new_pos =
      static_cast<ReaderCFileCookieBase*>(cookie)->Seek(
          IntCast<int64_t>(*offset), whence);
  if (ABSL_PREDICT_FALSE(new_pos == std::nullopt)) {
    *offset = -1;
    return -1;
  }
  *offset = IntCast<off64_t>(*new_pos);
  return 0;
}

static int ReaderCFileClose(void* cookie) {
  const int result = static_cast<ReaderCFileCookieBase*>(cookie)->Close();
  delete static_cast<ReaderCFileCookieBase*>(cookie);
  if (ABSL_PREDICT_FALSE(result != 0)) {
    errno = result;
    return -1;
  }
  return 0;
}

}  // extern "C"

OwnedCFile ReaderCFileImpl(ReaderCFileCookieBase* cookie,
                           std::string&& filename) {
  return OwnedCFile(fopencookie(cookie, "r",
                                {ReaderCFileRead, nullptr, ReaderCFileSeek,
                                 ReaderCFileClose}),
                    std::move(filename));
}

}  // namespace riegeli::cfile_internal


================================================
FILE: riegeli/bytes/reader_cfile.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_READER_CFILE_H_
#define RIEGELI_BYTES_READER_CFILE_H_

#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/types.h>

#include <optional>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/errno_mapping.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/reset.h"
#include "riegeli/bytes/cfile_handle.h"
#include "riegeli/bytes/path_ref.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

class ReaderCFileOptions {
 public:
  ReaderCFileOptions() noexcept {}

  // The filename assumed by the returned `OwnedCFile`.
  //
  // Default: "<unspecified>".
  ReaderCFileOptions& set_filename(PathInitializer filename) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    riegeli::Reset(filename_, std::move(filename));
    return *this;
  }
  ReaderCFileOptions&& set_filename(PathInitializer filename) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_filename(std::move(filename)));
  }
  std::string& filename() ABSL_ATTRIBUTE_LIFETIME_BOUND { return filename_; }
  const std::string& filename() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return filename_;
  }

 private:
  std::string filename_ = "<unspecified>";
};

// A read-only `FILE` which reads data from a `Reader`.
//
// This requires `fopencookie()` to be supported by the C library.
//
// The `FILE` supports `fseek()` forwards in any case, and backwards if
// `Reader::SupportsRewind()`.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the `Reader`. `Src` must support `Dependency<Reader*, Src>`,
// e.g. `Reader*` (not owned), `ChainReader<>` (owned),
// `std::unique_ptr<Reader>` (owned), `Any<Reader*>` (maybe owned).
//
// `src` supports `riegeli::Maker<Src>(args...)` to construct `Src` in-place.
//
// The `Reader` must not be accessed until the `FILE` is closed. Warning: this
// includes implicit closing of all `FILE` objects which are still open at
// program exit, hence if the `FILE` persists until program exit, then the
// `Reader` must do so as well.
template <
    typename Src,
    std::enable_if_t<TargetSupportsDependency<Reader*, Src>::value, int> = 0>
OwnedCFile ReaderCFile(Src&& src,
                       ReaderCFileOptions options = ReaderCFileOptions());

// Implementation details follow.

namespace cfile_internal {

class ReaderCFileCookieBase {
 public:
  virtual ~ReaderCFileCookieBase();

  ssize_t Read(char* dest, size_t length);

  // Use `int64_t` instead of `off64_t` to avoid a dependency on
  // `#define _GNU_SOURCE` in a header.
  std::optional<int64_t> Seek(int64_t offset, int whence);

  // Returns 0 on success, or `errno` value on failure.
  virtual int Close() = 0;

 protected:
  ReaderCFileCookieBase() noexcept {}

  ReaderCFileCookieBase(const ReaderCFileCookieBase&) = delete;
  ReaderCFileCookieBase& operator=(const ReaderCFileCookieBase&) = delete;

  virtual Reader* SrcReader() = 0;

  void Initialize(Reader* reader);
};

inline void ReaderCFileCookieBase::Initialize(Reader* reader) {
  RIEGELI_ASSERT_NE(reader, nullptr)
      << "Failed precondition of ReaderCFile(): null Reader pointer";
}

template <typename Src>
class ReaderCFileCookie : public ReaderCFileCookieBase {
 public:
  explicit ReaderCFileCookie(Initializer<Src> src) : src_(std::move(src)) {
    Initialize(src_.get());
  }

  int Close() override;

 protected:
  Reader* SrcReader() override { return src_.get(); }

 private:
  Dependency<Reader*, Src> src_;
};

template <typename Src>
int ReaderCFileCookie<Src>::Close() {
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      return StatusCodeToErrno(src_->status().code());
    }
  }
  return 0;
}

OwnedCFile ReaderCFileImpl(ReaderCFileCookieBase* cookie,
                           std::string&& filename);

}  // namespace cfile_internal

template <typename Src,
          std::enable_if_t<TargetSupportsDependency<Reader*, Src>::value, int>>
OwnedCFile ReaderCFile(Src&& src, ReaderCFileOptions options) {
  return cfile_internal::ReaderCFileImpl(
      new cfile_internal::ReaderCFileCookie<TargetT<Src>>(
          std::forward<Src>(src)),
      std::move(options.filename()));
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_READER_CFILE_H_


================================================
FILE: riegeli/bytes/reader_factory.cc
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/reader_factory.h"

#include <stddef.h>

#include <cstring>
#include <limits>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/base/thread_annotations.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "absl/synchronization/mutex.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/pullable_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

class ReaderFactoryBase::ConcurrentReader : public PullableReader {
 public:
  explicit ConcurrentReader(Shared* shared, Position initial_pos);

  ConcurrentReader(const ConcurrentReader&) = delete;
  ConcurrentReader& operator=(const ConcurrentReader&) = delete;

  bool ToleratesReadingAhead() override;
  bool SupportsRandomAccess() override { return true; }
  bool SupportsNewReader() override { return true; }

 protected:
  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  void SetReadAllHintImpl(bool read_all_hint) override;
  bool PullBehindScratch(size_t recommended_length) override;
  using PullableReader::ReadBehindScratch;
  bool ReadBehindScratch(size_t length, char* dest) override;
  bool ReadBehindScratch(size_t length, Chain& dest) override;
  bool ReadBehindScratch(size_t length, absl::Cord& dest) override;
  using PullableReader::CopyBehindScratch;
  bool CopyBehindScratch(Position length, Writer& dest) override;
  using PullableReader::ReadSomeBehindScratch;
  bool ReadSomeBehindScratch(size_t max_length, char* dest) override;
  using PullableReader::CopySomeBehindScratch;
  bool CopySomeBehindScratch(size_t max_length, Writer& dest) override;
  void ReadHintBehindScratch(size_t min_length,
                             size_t recommended_length) override;
  bool SyncBehindScratch(SyncType sync_type) override;
  bool SeekBehindScratch(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;

 private:
  bool SyncPos() ABSL_SHARED_LOCKS_REQUIRED(shared_->mutex);
  bool ReadSome() ABSL_SHARED_LOCKS_REQUIRED(shared_->mutex);

  Shared* shared_;
  ReadBufferSizer buffer_sizer_;
  // Buffered data, read directly before the original position which is
  // `start_pos() + (secondary_buffer_.size() - iter_.CharIndexInChain())`
  // when scratch is not used.
  Chain secondary_buffer_;
  // Invariant: `iter_.chain() == (is_open() ? &secondary_buffer_ : nullptr)`
  Chain::BlockIterator iter_;

  // Invariants if `is_open()` and scratch is not used:
  //   `start() == (iter_ == secondary_buffer_.blocks().cend() ? nullptr
  //                                                           : iter_->data())`
  //   `start_to_limit() ==
  //        (iter_ == secondary_buffer_.blocks().cend() ? 0 : iter_->size())`
};

inline ReaderFactoryBase::ConcurrentReader::ConcurrentReader(
    Shared* shared, Position initial_pos)
    : shared_(shared),
      buffer_sizer_(shared->buffer_options),
      iter_(secondary_buffer_.blocks().cend()) {
  set_limit_pos(initial_pos);
  buffer_sizer_.BeginRun(limit_pos());
}

void ReaderFactoryBase::ConcurrentReader::Done() {
  PullableReader::Done();
  secondary_buffer_ = Chain();
  iter_ = Chain::BlockIterator();
}

absl::Status ReaderFactoryBase::ConcurrentReader::AnnotateStatusImpl(
    absl::Status status) {
  if (is_open()) {
    absl::MutexLock lock(shared_->mutex);
    shared_->reader->Seek(pos());
    return shared_->reader->AnnotateStatus(std::move(status));
  }
  return status;
}

inline bool ReaderFactoryBase::ConcurrentReader::SyncPos() {
  if (ABSL_PREDICT_FALSE(!shared_->reader->Seek(limit_pos()))) {
    if (ABSL_PREDICT_FALSE(!shared_->reader->ok())) {
      return FailWithoutAnnotation(shared_->reader->status());
    }
    return false;
  }
  return true;
}

inline bool ReaderFactoryBase::ConcurrentReader::ReadSome() {
  if (!shared_->reader->ReadSome(buffer_sizer_.BufferLength(limit_pos()),
                                 secondary_buffer_)) {
    if (ABSL_PREDICT_FALSE(!shared_->reader->ok())) {
      return FailWithoutAnnotation(shared_->reader->status());
    }
    return false;
  }
  iter_ = secondary_buffer_.blocks().cbegin();
  return true;
}

void ReaderFactoryBase::ConcurrentReader::SetReadAllHintImpl(
    bool read_all_hint) {
  buffer_sizer_.set_read_all_hint(read_all_hint);
}

bool ReaderFactoryBase::ConcurrentReader::PullBehindScratch(
    size_t recommended_length) {
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of PullableReader::PullBehindScratch(): "
         "enough data available, use Pull() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::PullBehindScratch(): "
         "scratch used";
  if (iter_ != secondary_buffer_.blocks().cend()) ++iter_;
  set_buffer();
  for (;;) {
    while (iter_ != secondary_buffer_.blocks().cend()) {
      if (ABSL_PREDICT_TRUE(!iter_->empty())) {
        set_buffer(iter_->data(), iter_->size());
        move_limit_pos(available());
        return true;
      }
      ++iter_;
    }

    if (ABSL_PREDICT_FALSE(!ok())) return false;
    secondary_buffer_.Clear();
    iter_ = secondary_buffer_.blocks().cend();
    absl::MutexLock lock(shared_->mutex);
    if (ABSL_PREDICT_FALSE(!SyncPos())) return false;
    if (ABSL_PREDICT_FALSE(!ReadSome())) return false;
  }
}

bool ReaderFactoryBase::ConcurrentReader::ReadBehindScratch(size_t length,
                                                            char* dest) {
  RIEGELI_ASSERT_LT(available(), length)
      << "Failed precondition of PullableReader::ReadBehindScratch(char*): "
         "enough data available, use Read(char*) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::ReadBehindScratch(char*): "
         "scratch used";
  if (iter_ != secondary_buffer_.blocks().cend()) {
    const size_t available_length = available();
    riegeli::null_safe_memcpy(dest, cursor(), available_length);
    move_cursor(available_length);
    dest += available_length;
    length -= available_length;
    ++iter_;
  }
  set_buffer();
  for (;;) {
    while (iter_ != secondary_buffer_.blocks().cend()) {
      move_limit_pos(iter_->size());
      if (length <= iter_->size()) {
        set_buffer(iter_->data(), iter_->size(), length);
        std::memcpy(dest, start(), start_to_cursor());
        return true;
      }
      std::memcpy(dest, iter_->data(), iter_->size());
      dest += iter_->size();
      length -= iter_->size();
      ++iter_;
    }

    if (ABSL_PREDICT_FALSE(!ok())) return false;
    secondary_buffer_.Clear();
    iter_ = secondary_buffer_.blocks().cend();
    absl::MutexLock lock(shared_->mutex);
    if (ABSL_PREDICT_FALSE(!SyncPos())) return false;
    if (length >= buffer_sizer_.BufferLength(pos())) {
      // Read directly to `dest`.
      if (ABSL_PREDICT_FALSE(!shared_->reader->Read(length, dest))) {
        set_limit_pos(shared_->reader->pos());
        if (ABSL_PREDICT_FALSE(!shared_->reader->ok())) {
          return FailWithoutAnnotation(shared_->reader->status());
        }
        return false;
      }
      move_limit_pos(length);
      return true;
    }
    if (ABSL_PREDICT_FALSE(!ReadSome())) return false;
  }
}

bool ReaderFactoryBase::ConcurrentReader::ReadBehindScratch(size_t length,
                                                            Chain& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of PullableReader::ReadBehindScratch(Chain&): "
         "enough data available, use Read(Chain&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of PullableReader::ReadBehindScratch(Chain&): "
         "Chain size overflow";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::ReadBehindScratch(Chain&): "
         "scratch used";
  if (length <= available()) {
    dest.Append(ExternalRef(*iter_, absl::string_view(cursor(), length)));
    move_cursor(length);
    return true;
  }
  if (iter_ != secondary_buffer_.blocks().cend()) {
    dest.Append(ExternalRef(*iter_, absl::string_view(cursor(), available())));
    length -= available();
    ++iter_;
  }
  set_buffer();
  for (;;) {
    while (iter_ != secondary_buffer_.blocks().cend()) {
      move_limit_pos(iter_->size());
      if (length <= iter_->size()) {
        set_buffer(iter_->data(), iter_->size(), length);
        dest.Append(
            ExternalRef(*iter_, absl::string_view(start(), start_to_cursor())));
        return true;
      }
      dest.Append(*iter_);
      length -= iter_->size();
      ++iter_;
    }

    if (ABSL_PREDICT_FALSE(!ok())) return false;
    secondary_buffer_.Clear();
    iter_ = secondary_buffer_.blocks().cend();
    absl::MutexLock lock(shared_->mutex);
    if (ABSL_PREDICT_FALSE(!SyncPos())) return false;
    if (length >= buffer_sizer_.BufferLength(pos())) {
      // Read directly to `dest`.
      if (ABSL_PREDICT_FALSE(!shared_->reader->ReadAndAppend(length, dest))) {
        set_limit_pos(shared_->reader->pos());
        if (ABSL_PREDICT_FALSE(!shared_->reader->ok())) {
          return FailWithoutAnnotation(shared_->reader->status());
        }
        return false;
      }
      move_limit_pos(length);
      return true;
    }
    if (ABSL_PREDICT_FALSE(!ReadSome())) return false;
  }
}

bool ReaderFactoryBase::ConcurrentReader::ReadBehindScratch(size_t length,
                                                            absl::Cord& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of PullableReader::ReadBehindScratch(Cord&): "
         "enough data available, use Read(Chain&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of PullableReader::ReadBehindScratch(Cord&): "
         "Chain size overflow";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::ReadBehindScratch(Cord&): "
         "scratch used";
  if (length <= available()) {
    ExternalRef(*iter_, absl::string_view(cursor(), length)).AppendTo(dest);
    move_cursor(length);
    return true;
  }
  if (iter_ != secondary_buffer_.blocks().cend()) {
    ExternalRef(*iter_, absl::string_view(cursor(), available()))
        .AppendTo(dest);
    length -= available();
    ++iter_;
  }
  set_buffer();
  for (;;) {
    while (iter_ != secondary_buffer_.blocks().cend()) {
      move_limit_pos(iter_->size());
      if (length <= iter_->size()) {
        set_buffer(iter_->data(), iter_->size(), length);
        ExternalRef(*iter_, absl::string_view(start(), start_to_cursor()))
            .AppendTo(dest);
        return true;
      }
      ExternalRef(*iter_).AppendTo(dest);
      length -= iter_->size();
      ++iter_;
    }

    if (ABSL_PREDICT_FALSE(!ok())) return false;
    secondary_buffer_.Clear();
    iter_ = secondary_buffer_.blocks().cend();
    absl::MutexLock lock(shared_->mutex);
    if (ABSL_PREDICT_FALSE(!SyncPos())) return false;
    if (length >= buffer_sizer_.BufferLength(pos())) {
      // Read directly to `dest`.
      if (ABSL_PREDICT_FALSE(!shared_->reader->ReadAndAppend(length, dest))) {
        set_limit_pos(shared_->reader->pos());
        if (ABSL_PREDICT_FALSE(!shared_->reader->ok())) {
          return FailWithoutAnnotation(shared_->reader->status());
        }
        return false;
      }
      move_limit_pos(length);
      return true;
    }
    if (ABSL_PREDICT_FALSE(!ReadSome())) return false;
  }
}

bool ReaderFactoryBase::ConcurrentReader::CopyBehindScratch(Position length,
                                                            Writer& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of PullableReader::CopyBehindScratch(Writer&): "
         "enough data available, use Copy(Writer&) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::CopyBehindScratch(Writer&): "
         "scratch used";
  if (length <= available()) {
    const absl::string_view data(cursor(), length);
    move_cursor(length);
    return dest.Write(ExternalRef(*iter_, data));
  }
  if (iter_ != secondary_buffer_.blocks().cend()) {
    if (ABSL_PREDICT_FALSE(!dest.Write(
            ExternalRef(*iter_, absl::string_view(cursor(), available()))))) {
      return false;
    }
    length -= available();
    ++iter_;
  }
  set_buffer();
  for (;;) {
    while (iter_ != secondary_buffer_.blocks().cend()) {
      move_limit_pos(iter_->size());
      if (length <= iter_->size()) {
        set_buffer(iter_->data(), iter_->size(), length);
        return dest.Write(
            ExternalRef(*iter_, absl::string_view(start(), start_to_cursor())));
      }
      if (ABSL_PREDICT_FALSE(!dest.Write(*iter_))) return false;
      length -= iter_->size();
      ++iter_;
    }

    if (ABSL_PREDICT_FALSE(!ok())) return false;
    secondary_buffer_.Clear();
    iter_ = secondary_buffer_.blocks().cend();
    set_buffer();
    absl::MutexLock lock(shared_->mutex);
    if (ABSL_PREDICT_FALSE(!SyncPos())) return false;
    if (length >= buffer_sizer_.BufferLength(pos())) {
      // Read directly to `dest`.
      if (ABSL_PREDICT_FALSE(!shared_->reader->Copy(length, dest))) {
        set_limit_pos(shared_->reader->pos());
        if (ABSL_PREDICT_FALSE(!shared_->reader->ok())) {
          return FailWithoutAnnotation(shared_->reader->status());
        }
        return false;
      }
      move_limit_pos(length);
      return true;
    }
    if (ABSL_PREDICT_FALSE(!ReadSome())) return false;
  }
}

bool ReaderFactoryBase::ConcurrentReader::ReadSomeBehindScratch(
    size_t max_length, char* dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of PullableReader::ReadSomeBehindScratch(char*): "
         "nothing to read, use ReadSome(char*) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of PullableReader::ReadSomeBehindScratch(char*): "
         "some data available, use ReadSome(char*) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::ReadSomeBehindScratch(char*): "
         "scratch used";
  if (iter_ != secondary_buffer_.blocks().cend()) ++iter_;
  set_buffer();
  for (;;) {
    while (iter_ != secondary_buffer_.blocks().cend()) {
      if (ABSL_PREDICT_TRUE(!iter_->empty())) {
        max_length = UnsignedMin(max_length, iter_->size());
        set_buffer(iter_->data(), iter_->size(), max_length);
        move_limit_pos(start_to_limit());
        std::memcpy(dest, start(), start_to_cursor());
        return true;
      }
      ++iter_;
    }

    if (ABSL_PREDICT_FALSE(!ok())) return false;
    secondary_buffer_.Clear();
    iter_ = secondary_buffer_.blocks().cend();
    absl::MutexLock lock(shared_->mutex);
    if (ABSL_PREDICT_FALSE(!SyncPos())) return false;
    if (max_length >= buffer_sizer_.BufferLength(pos())) {
      // Read directly to `dest`.
      if (ABSL_PREDICT_FALSE(!shared_->reader->ReadSome(max_length, dest))) {
        if (ABSL_PREDICT_FALSE(!shared_->reader->ok())) {
          return FailWithoutAnnotation(shared_->reader->status());
        }
        return false;
      }
      set_limit_pos(shared_->reader->pos());
      return true;
    }
    if (ABSL_PREDICT_FALSE(!ReadSome())) return false;
  }
}

bool ReaderFactoryBase::ConcurrentReader::CopySomeBehindScratch(
    size_t max_length, Writer& dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of "
         "PullableReader::CopySomeBehindScratch(Writer&): "
         "nothing to read, use CopySome(Writer&) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of "
         "PullableReader::CopySomeBehindScratch(Writer&): "
         "some data available, use CopySome(Writer&) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PullableReader::CopySomeBehindScratch(Writer&): "
         "scratch used";
  if (iter_ != secondary_buffer_.blocks().cend()) ++iter_;
  set_buffer();
  for (;;) {
    while (iter_ != secondary_buffer_.blocks().cend()) {
      if (ABSL_PREDICT_TRUE(!iter_->empty())) {
        max_length = UnsignedMin(max_length, iter_->size());
        set_buffer(iter_->data(), iter_->size(), max_length);
        move_limit_pos(start_to_limit());
        return dest.Write(
            ExternalRef(*iter_, absl::string_view(start(), start_to_cursor())));
      }
      ++iter_;
    }

    if (ABSL_PREDICT_FALSE(!ok())) return false;
    secondary_buffer_.Clear();
    iter_ = secondary_buffer_.blocks().cend();
    absl::MutexLock lock(shared_->mutex);
    if (ABSL_PREDICT_FALSE(!SyncPos())) return false;
    if (max_length >= buffer_sizer_.BufferLength(pos())) {
      // Copy directly to `dest`.
      const bool copy_ok = shared_->reader->CopySome(max_length, dest);
      set_limit_pos(shared_->reader->pos());
      if (ABSL_PREDICT_FALSE(!copy_ok)) {
        if (ABSL_PREDICT_FALSE(!shared_->reader->ok())) {
          return FailWithoutAnnotation(shared_->reader->status());
        }
        return false;
      }
      return true;
    }
    if (ABSL_PREDICT_FALSE(!ReadSome())) return false;
  }
}

void ReaderFactoryBase::ConcurrentReader::ReadHintBehindScratch(
    size_t min_length, size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of PullableReader::ReadHintBehindScratch(): "
         "enough data available, use ReadHint() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::ReadHintBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return;
  const size_t secondary_buffered_length =
      secondary_buffer_.size() - iter_.CharIndexInChain(start_to_cursor());
  if (secondary_buffered_length < min_length) {
    set_limit_pos(pos());
    set_buffer();
    secondary_buffer_.RemovePrefix(secondary_buffer_.size() -
                                   secondary_buffered_length);
    const size_t min_length_to_read = min_length - secondary_buffered_length;
    const size_t recommended_length_to_read =
        UnsignedMax(recommended_length, min_length) - secondary_buffered_length;
    {
      absl::MutexLock lock(shared_->mutex);
      if (ABSL_PREDICT_FALSE(!shared_->reader->Seek(
              limit_pos() + secondary_buffered_length))) {
        if (ABSL_PREDICT_FALSE(!shared_->reader->ok())) {
          FailWithoutAnnotation(shared_->reader->status());
        }
      } else {
        if (recommended_length_to_read > min_length_to_read) {
          shared_->reader->ReadHint(min_length_to_read,
                                    recommended_length_to_read);
        }
        if (ABSL_PREDICT_FALSE(!shared_->reader->ReadAndAppend(
                min_length_to_read, secondary_buffer_))) {
          if (ABSL_PREDICT_FALSE(!shared_->reader->ok())) {
            FailWithoutAnnotation(shared_->reader->status());
          }
        }
      }
    }
    iter_ = secondary_buffer_.blocks().cbegin();
    if (iter_ != secondary_buffer_.blocks().cend()) {
      set_buffer(iter_->data(), iter_->size());
      move_limit_pos(available());
    }
  }
}

bool ReaderFactoryBase::ConcurrentReader::SyncBehindScratch(
    SyncType sync_type) {
  const Position new_pos = pos();
  buffer_sizer_.EndRun(new_pos);
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  secondary_buffer_.Clear();
  iter_ = secondary_buffer_.blocks().cend();
  set_buffer();
  set_limit_pos(new_pos);
  buffer_sizer_.BeginRun(limit_pos());
  if (sync_type == SyncType::kFromObject) return true;
  absl::MutexLock lock(shared_->mutex);
  return shared_->reader->Sync(sync_type);
}

bool ReaderFactoryBase::ConcurrentReader::ToleratesReadingAhead() {
  if (buffer_sizer_.read_all_hint()) return true;
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  absl::MutexLock lock(shared_->mutex);
  return shared_->reader->ToleratesReadingAhead();
}

bool ReaderFactoryBase::ConcurrentReader::SeekBehindScratch(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of PullableReader::SeekBehindScratch(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::SeekBehindScratch(): "
         "scratch used";
  const Position secondary_buffer_begin =
      start_pos() - iter_.CharIndexInChain();
  const Position secondary_buffer_end =
      secondary_buffer_begin + secondary_buffer_.size();
  if (new_pos >= secondary_buffer_begin && new_pos <= secondary_buffer_end) {
    // Seeking within `secondary_buffer_`.
    if (new_pos == secondary_buffer_end) {
      iter_ = secondary_buffer_.blocks().cend();
      set_buffer();
      set_limit_pos(secondary_buffer_end);
      return true;
    }
    const Chain::BlockAndChar block_and_char =
        secondary_buffer_.BlockAndCharIndex(
            IntCast<size_t>(new_pos - secondary_buffer_begin));
    iter_ = block_and_char.block_iter;
    set_buffer(iter_->data(), iter_->size(), block_and_char.char_index);
    set_limit_pos(new_pos + available());
    return true;
  }

  buffer_sizer_.EndRun(pos());
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  secondary_buffer_.Clear();
  iter_ = secondary_buffer_.blocks().cend();
  set_buffer();
  set_limit_pos(secondary_buffer_end);
  if (new_pos > secondary_buffer_end) {
    // Seeking forwards.
    std::optional<Position> size;
    {
      absl::MutexLock lock(shared_->mutex);
      size = shared_->reader->Size();
      if (ABSL_PREDICT_FALSE(size == std::nullopt)) {
        return FailWithoutAnnotation(shared_->reader->status());
      }
    }
    if (ABSL_PREDICT_FALSE(new_pos > *size)) {
      // Source ends.
      set_limit_pos(*size);
      buffer_sizer_.BeginRun(limit_pos());
      return false;
    }
  }
  set_limit_pos(new_pos);
  buffer_sizer_.BeginRun(limit_pos());
  return true;
}

std::optional<Position> ReaderFactoryBase::ConcurrentReader::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  absl::MutexLock lock(shared_->mutex);
  const std::optional<Position> size = shared_->reader->Size();
  if (ABSL_PREDICT_FALSE(size == std::nullopt)) {
    FailWithoutAnnotation(shared_->reader->status());
  }
  return size;
}

std::unique_ptr<Reader> ReaderFactoryBase::ConcurrentReader::NewReaderImpl(
    Position initial_pos) {
  return std::make_unique<ConcurrentReader>(shared_, initial_pos);
}

void ReaderFactoryBase::Initialize(BufferOptions buffer_options, Reader* src) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of ReaderFactory: null Reader pointer";
  RIEGELI_ASSERT(src->SupportsRandomAccess())
      << "Failed precondition of ReaderFactory: "
         "the original Reader does not support random access";
  initial_pos_ = src->pos();
  if (!src->SupportsNewReader()) {
    shared_ = std::make_unique<Shared>(buffer_options, src);
  }
  if (ABSL_PREDICT_FALSE(!src->ok())) FailWithoutAnnotation(src->status());
}

void ReaderFactoryBase::Done() { shared_.reset(); }

absl::Status ReaderFactoryBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    if (shared_ == nullptr) {
      Reader& src = *SrcReader();
      return src.AnnotateStatus(std::move(status));
    } else {
      absl::MutexLock lock(shared_->mutex);
      return shared_->reader->AnnotateStatus(std::move(status));
    }
  }
  return status;
}

std::unique_ptr<Reader> ReaderFactoryBase::NewReader(Position initial_pos) const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  if (shared_ == nullptr) {
    Reader& src = *SrcReader();
    // `src.NewReaderCurrentPos()` is thread-safe in this context because the
    // only operation on `src` is `NewReader()` or `NewReaderCurrentPos()`.
    std::unique_ptr<Reader> reader = initial_pos == pos()
                                         ? src.NewReaderCurrentPos()
                                         : src.NewReader(initial_pos);
    RIEGELI_ASSERT_NE(reader, nullptr)
        << "Failed postcondition of Reader::NewReader(): "
           "returned null but Reader is ok() and SupportsNewReader()";
    return reader;
  } else {
    return std::make_unique<ConcurrentReader>(shared_.get(), initial_pos);
  }
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/reader_factory.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_READER_FACTORY_H_
#define RIEGELI_BYTES_READER_FACTORY_H_

#include <memory>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/base/thread_annotations.h"
#include "absl/status/status.h"
#include "absl/synchronization/mutex.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/stable_dependency.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

// Template parameter independent part of `ReaderFactory`.
class ReaderFactoryBase : public Object {
 public:
  class Options : public BufferOptionsBase<Options> {};

  // Returns the original `Reader`. Unchanged by `Close()`.
  virtual Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns the original position of the original `Reader`.
  Position pos() const { return initial_pos_; }

  // Returns a `Reader` which reads from the same source as the original
  // `Reader`, but has an independent current position, starting from
  // `initial_pos`, defaulting to `pos()`.
  //
  // If the source ends before `initial_pos`, the position of the new `Reader`
  // is set to the end. The resulting `Reader` supports `Seek()` and
  // `NewReader()`. Calling `NewReader()` on the new `Reader` is equivalent to
  // calling it on this `ReaderFactory` again.
  //
  // The new `Reader` does not own the source, even if the original `Reader`
  // does. The original `Reader` must not be accessed until the new `Reader` is
  // closed or no longer used.
  //
  // In contrast to `Reader::NewReader()` and `Reader::NewReaderCurrentPos()`,
  // `ReaderFactory::NewReader()` is unconditionally const and thread-safe,
  // even with an implicit initial position (concurrency with other operations
  // is not applicable because the original `Reader` must not be accessed
  // directly). The optimization of sharing the current buffer is applicable
  // as long as the initial position is implicit or matches `pos()`.
  //
  // If `ok()` is `true`, then `NewReader()` does not return `nullptr`.
  std::unique_ptr<Reader> NewReader(Position initial_pos) const
      ABSL_ATTRIBUTE_LIFETIME_BOUND;
  std::unique_ptr<Reader> NewReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return NewReader(pos());
  }

 protected:
  using Object::Object;

  ReaderFactoryBase(ReaderFactoryBase&& that) noexcept;
  ReaderFactoryBase& operator=(ReaderFactoryBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(BufferOptions buffer_options, Reader* src);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;

 private:
  class ConcurrentReader;

  struct Shared {
    explicit Shared(BufferOptions buffer_options, Reader* reader)
        : buffer_options(buffer_options), reader(reader) {}

    BufferOptions buffer_options;
    absl::Mutex mutex;
    Reader* reader ABSL_GUARDED_BY(mutex);
  };

  Position initial_pos_ = 0;
  // If `shared_ == nullptr`, then `!is_open()` or `Reader::NewReader()` is
  // used. If `shared_ != nullptr`, then `ConcurrentReader` emulation is used.
  std::unique_ptr<Shared> shared_;
};

// `ReaderFactory` exposes `Reader::NewReader()`, or provides its emulation
// for `Reader` classes which do not support `NewReader()`. This allows for
// interleaved or concurrent reading of several regions of the same source.
//
// The original `Reader` must support random access.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the original `Reader`. `Src` must support
// `Dependency<Reader*, Src>`, e.g. `Reader*` (not owned, default),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The original `Reader` must not be accessed until the `ReaderFactory` is
// closed or no longer used.
template <typename Src = Reader*>
class ReaderFactory : public ReaderFactoryBase {
 public:
  // Creates a closed `ReaderFactory`.
  explicit ReaderFactory(Closed) noexcept : ReaderFactoryBase(kClosed) {}

  // Will read from the original `Reader` provided by `src`.
  explicit ReaderFactory(Initializer<Src> src, Options options = Options());

  ReaderFactory(ReaderFactory&& that) = default;
  ReaderFactory& operator=(ReaderFactory&& that) = default;

  // Makes `*this` equivalent to a newly constructed `ReaderFactory`. This
  // avoids constructing a temporary `ReaderFactory` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());

  // Returns the object providing and possibly owning the original `Reader`.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;

 private:
  // The object providing and possibly owning the original `Reader`.
  StableDependency<Reader*, Src> src_;
};

explicit ReaderFactory(Closed) -> ReaderFactory<DeleteCtad<Closed>>;
template <typename Src>
explicit ReaderFactory(Src&& src, ReaderFactoryBase::Options options =
                                      ReaderFactoryBase::Options())
    -> ReaderFactory<TargetT<Src>>;

// Implementation details follow.

inline ReaderFactoryBase::ReaderFactoryBase(ReaderFactoryBase&& that) noexcept
    : Object(static_cast<Object&&>(that)),
      initial_pos_(that.initial_pos_),
      shared_(std::move(that.shared_)) {}

inline ReaderFactoryBase& ReaderFactoryBase::operator=(
    ReaderFactoryBase&& that) noexcept {
  Object::operator=(static_cast<Object&&>(that));
  initial_pos_ = that.initial_pos_;
  shared_ = std::move(that.shared_);
  return *this;
}

inline void ReaderFactoryBase::Reset(Closed) {
  Object::Reset(kClosed);
  initial_pos_ = 0;
  shared_.reset();
}

inline void ReaderFactoryBase::Reset() {
  Object::Reset();
  // `initial_pos_` will be set by `Initialize()`.
  shared_.reset();
}

template <typename Src>
inline ReaderFactory<Src>::ReaderFactory(Initializer<Src> src, Options options)
    : src_(std::move(src)) {
  Initialize(options.buffer_options(), src_.get());
}

template <typename Src>
inline void ReaderFactory<Src>::Reset(Closed) {
  ReaderFactoryBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void ReaderFactory<Src>::Reset(Initializer<Src> src, Options options) {
  ReaderFactoryBase::Reset();
  src_.Reset(std::move(src));
  Initialize(options.buffer_options(), src_.get());
}

template <typename Src>
void ReaderFactory<Src>::Done() {
  ReaderFactoryBase::Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      FailWithoutAnnotation(src_->status());
    }
  }
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_READER_FACTORY_H_


================================================
FILE: riegeli/bytes/reader_istream.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/reader_istream.h"

#include <stddef.h>

#include <ios>
#include <iosfwd>
#include <limits>
#include <optional>

#include "absl/base/optimization.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

namespace stream_internal {

class ReaderStreambuf::BufferSync {
 public:
  explicit BufferSync(ReaderStreambuf* streambuf) : streambuf_(streambuf) {
    streambuf_->reader_->set_cursor(streambuf_->gptr());
  }

  BufferSync(const BufferSync&) = delete;
  BufferSync& operator=(const BufferSync&) = delete;

  ~BufferSync() {
    streambuf_->setg(const_cast<char*>(streambuf_->reader_->start()),
                     const_cast<char*>(streambuf_->reader_->cursor()),
                     const_cast<char*>(streambuf_->reader_->limit()));
  }
  ReaderStreambuf* streambuf_;
};

void ReaderStreambuf::Fail() { state_.Fail(reader_->status()); }

int ReaderStreambuf::sync() {
  if (ABSL_PREDICT_FALSE(!ok())) return -1;
  BufferSync buffer_sync(this);
  if (ABSL_PREDICT_FALSE(!reader_->Sync())) {
    Fail();
    return -1;
  }
  return 0;
}

std::streamsize ReaderStreambuf::showmanyc() {
  if (ABSL_PREDICT_FALSE(!ok())) return -1;
  BufferSync buffer_sync(this);
  if (ABSL_PREDICT_FALSE(!reader_->Pull())) {
    if (ABSL_PREDICT_FALSE(!reader_->ok())) Fail();
    return -1;
  }
  return IntCast<std::streamsize>(reader_->available());
}

int ReaderStreambuf::underflow() {
  if (ABSL_PREDICT_FALSE(!ok())) return traits_type::eof();
  BufferSync buffer_sync(this);
  if (ABSL_PREDICT_FALSE(!reader_->Pull())) {
    if (ABSL_PREDICT_FALSE(!reader_->ok())) Fail();
    return traits_type::eof();
  }
  return traits_type::to_int_type(*reader_->cursor());
}

std::streamsize ReaderStreambuf::xsgetn(char* dest, std::streamsize length) {
  RIEGELI_ASSERT_GE(length, 0)
      << "Failed precondition of streambuf::xsgetn(): negative length";
  if (ABSL_PREDICT_TRUE(length <= egptr() - gptr())) {
    riegeli::null_safe_memcpy(dest, gptr(), IntCast<size_t>(length));
    // Do not use `gbump()` because its parameter has type `int`.
    setg(eback(), gptr() + length, egptr());
    return length;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return 0;
  BufferSync buffer_sync(this);
  size_t length_read;
  if (ABSL_PREDICT_FALSE(
          !reader_->Read(IntCast<size_t>(length), dest, &length_read)) &&
      ABSL_PREDICT_FALSE(!reader_->ok())) {
    Fail();
  }
  return IntCast<std::streamsize>(length_read);
}

std::streampos ReaderStreambuf::seekoff(std::streamoff off,
                                        std::ios_base::seekdir dir,
                                        std::ios_base::openmode which) {
  if (ABSL_PREDICT_FALSE(!ok())) return std::streampos(std::streamoff{-1});
  BufferSync buffer_sync(this);
  Position new_pos;
  switch (dir) {
    case std::ios_base::beg:
      if (ABSL_PREDICT_FALSE(off < 0)) {
        return std::streampos(std::streamoff{-1});
      }
      new_pos = IntCast<Position>(off);
      break;
    case std::ios_base::cur:
      new_pos = reader_->pos();
      if (off < 0) {
        if (ABSL_PREDICT_FALSE(NegatingUnsignedCast(off) > new_pos)) {
          return std::streampos(std::streamoff{-1});
        }
        new_pos -= NegatingUnsignedCast(off);
        if (ABSL_PREDICT_FALSE(
                new_pos >
                Position{std::numeric_limits<std::streamoff>::max()})) {
          return std::streampos(std::streamoff{-1});
        }
      } else {
        if (ABSL_PREDICT_FALSE(
                new_pos >
                    Position{std::numeric_limits<std::streamoff>::max()} ||
                IntCast<Position>(off) >
                    Position{std::numeric_limits<std::streamoff>::max()} -
                        new_pos)) {
          return std::streampos(std::streamoff{-1});
        }
        new_pos += IntCast<Position>(off);
      }
      break;
    case std::ios_base::end: {
      if (ABSL_PREDICT_FALSE(!reader_->SupportsSize())) {
        // Indicate that `seekoff(std::ios_base::end)` is not supported.
        return std::streampos(std::streamoff{-1});
      }
      const std::optional<Position> size = reader_->Size();
      if (ABSL_PREDICT_FALSE(size == std::nullopt)) {
        Fail();
        return std::streampos(std::streamoff{-1});
      }
      if (ABSL_PREDICT_FALSE(off > 0 || NegatingUnsignedCast(off) > *size)) {
        return std::streampos(std::streamoff{-1});
      }
      new_pos = *size - NegatingUnsignedCast(off);
      if (ABSL_PREDICT_FALSE(
              new_pos > Position{std::numeric_limits<std::streamoff>::max()})) {
        return std::streampos(std::streamoff{-1});
      }
    } break;
    default:
      RIEGELI_ASSUME_UNREACHABLE()
          << "Unknown seek direction: " << static_cast<int>(dir);
  }
  if (new_pos >= reader_->pos()) {
    // Seeking forwards is supported even if random access is not.
  } else if (ABSL_PREDICT_FALSE(!reader_->SupportsRewind())) {
    // Indicate that `seekoff()` is not supported.
    return std::streampos(std::streamoff{-1});
  }
  if (ABSL_PREDICT_FALSE(!reader_->Seek(new_pos))) {
    if (ABSL_PREDICT_FALSE(!reader_->ok())) Fail();
    return std::streampos(std::streamoff{-1});
  }
  return std::streampos(IntCast<std::streamoff>(new_pos));
}

std::streampos ReaderStreambuf::seekpos(std::streampos pos,
                                        std::ios_base::openmode which) {
  return seekoff(std::streamoff(pos), std::ios_base::beg, which);
}

}  // namespace stream_internal

bool ReaderIStreamBase::close() {
  Done();
  return not_failed();
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/reader_istream.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_READER_ISTREAM_H_
#define RIEGELI_BYTES_READER_ISTREAM_H_

#include <ios>
#include <iosfwd>
#include <istream>
#include <streambuf>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

namespace stream_internal {

class ReaderStreambuf : public std::streambuf {
 public:
  explicit ReaderStreambuf(Closed) noexcept : state_(kClosed) {}

  ReaderStreambuf() noexcept {}

  ReaderStreambuf(ReaderStreambuf&& that) noexcept;
  ReaderStreambuf& operator=(ReaderStreambuf&& that) noexcept;

  void Initialize(Reader* src);
  void MoveBegin();
  void MoveEnd(Reader* src);
  void Done();

  bool ok() const { return state_.ok(); }
  bool is_open() const { return state_.is_open(); }
  bool not_failed() const { return state_.not_failed(); }
  absl::Status status() const { return state_.status(); }
  void MarkClosed() { state_.MarkClosed(); }
  ABSL_ATTRIBUTE_COLD void Fail();

 protected:
  int sync() override;
  std::streamsize showmanyc() override;
  int underflow() override;
  std::streamsize xsgetn(char* dest, std::streamsize length) override;
  std::streampos seekoff(std::streamoff off, std::ios_base::seekdir dir,
                         std::ios_base::openmode which) override;
  std::streampos seekpos(std::streampos pos,
                         std::ios_base::openmode which) override;

 private:
  class BufferSync;

  ObjectState state_;
  Reader* reader_ = nullptr;

  // Invariants:
  //   `eback() == (is_open() ? reader_->start() : nullptr)`
  //   `egptr() == (is_open() ? reader_->limit() : nullptr)`
};

}  // namespace stream_internal

// Template parameter independent part of `ReaderIStream`.
class ReaderIStreamBase : public std::istream {
 public:
  class Options {
   public:
    Options() noexcept {}
  };

  // Returns the `Reader`. Unchanged by `close()`.
  virtual Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // If `!is_open()`, does nothing. Otherwise:
  //  * Synchronizes the current `ReaderIStream` position to the `Reader`.
  //  * Closes the `Reader` if it is owned.
  //
  // Also, propagates `Reader` failures to `rdstate() & std::ios_base::badbit`
  // (doing this during reading is not feasible without throwing exceptions).
  //
  // Returns `true` if the `Reader` did not fail, i.e. if it was OK just before
  // becoming closed.
  //
  // Destroying or assigning to a `ReaderIStream` closes it implicitly, but an
  // explicit `close()` call allows to detect failures (use `status()` for
  // failure details).
  bool close();

  // Returns `true` if the `ReaderIStream` is OK, i.e. open and not failed.
  bool ok() const { return streambuf_.ok(); }

  // Returns `true` if the `ReaderIStream` is open, i.e. not closed.
  bool is_open() const { return streambuf_.is_open(); }

  // Returns `true` if the `ReaderIStream` is not failed.
  bool not_failed() const { return streambuf_.not_failed(); }

  // Returns an `absl::Status` describing the failure if the `ReaderIStream`
  // is failed, or an `absl::FailedPreconditionError()` if the `ReaderIStream`
  // is successfully closed, or `absl::OkStatus()` if the `ReaderIStream` is OK.
  absl::Status status() const { return streambuf_.status(); }

  // Supports `Dependency`.
  friend MakerType<Closed> RiegeliDependencySentinel(ReaderIStreamBase*) {
    return {kClosed};
  }

 protected:
  explicit ReaderIStreamBase(Closed) noexcept
      : std::istream(&streambuf_), streambuf_(kClosed) {}

  ReaderIStreamBase() noexcept : std::istream(&streambuf_) {}

  ReaderIStreamBase(ReaderIStreamBase&& that) noexcept;
  ReaderIStreamBase& operator=(ReaderIStreamBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(Reader* src);

  virtual void Done() = 0;

  stream_internal::ReaderStreambuf streambuf_;

  // Invariant: `rdbuf() == &streambuf_`
};

// Adapts a `Reader` to a `std::istream`.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the `Reader`. `Src` must support `Dependency<Reader*, Src>`,
// e.g. `Reader*` (not owned, default), `ChainReader<>` (owned),
// `std::unique_ptr<Reader>` (owned), `Any<Reader*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The `Reader` must not be accessed until the `ReaderIStream` is closed or no
// longer used.
template <typename Src = Reader*>
class ReaderIStream : public ReaderIStreamBase {
 public:
  // Creates a closed `ReaderIStream`.
  explicit ReaderIStream(Closed) noexcept : ReaderIStreamBase(kClosed) {}

  // Will read from the `Reader` provided by `src`.
  explicit ReaderIStream(Initializer<Src> src, Options options = Options());

  // These operations cannot be defaulted because `ReaderIStreamBase` virtually
  // derives from `std::ios` which has these operations deleted.
  ReaderIStream(ReaderIStream&& that) noexcept
#if __cpp_concepts
    requires std::is_move_constructible_v<Dependency<Reader*, Src>>
#endif
      : ReaderIStreamBase(static_cast<ReaderIStreamBase&&>(that)),
        src_(std::move(that.src_), *this, that) {
  }
  ReaderIStream& operator=(ReaderIStream&& that) noexcept
#if __cpp_concepts
    requires(std::is_move_assignable_v<Dependency<Reader*, Src>>)
#endif
  {
    ReaderIStreamBase::operator=(static_cast<ReaderIStreamBase&&>(that));
    src_.Reset(std::move(that.src_), *this, that);
    return *this;
  }

  ~ReaderIStream() override { Done(); }

  // Makes `*this` equivalent to a newly constructed `ReaderIStream`. This
  // avoids constructing a temporary `ReaderIStream` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());

  // Returns the object providing and possibly owning the `Reader`. Unchanged by
  // `close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;

 private:
  class Mover;

  // The object providing and possibly owning the `Reader`.
  MovingDependency<Reader*, Src, Mover> src_;
};

explicit ReaderIStream(Closed) -> ReaderIStream<DeleteCtad<Closed>>;
template <typename Src>
explicit ReaderIStream(Src&& src, ReaderIStreamBase::Options options =
                                      ReaderIStreamBase::Options())
    -> ReaderIStream<TargetT<Src>>;

// Implementation details follow.

namespace stream_internal {

inline ReaderStreambuf::ReaderStreambuf(ReaderStreambuf&& that) noexcept
    : std::streambuf(that),
      state_(std::move(that.state_)),
      reader_(that.reader_) {
  that.setg(nullptr, nullptr, nullptr);
}

inline ReaderStreambuf& ReaderStreambuf::operator=(
    ReaderStreambuf&& that) noexcept {
  if (ABSL_PREDICT_TRUE(&that != this)) {
    std::streambuf::operator=(that);
    state_ = std::move(that.state_);
    reader_ = that.reader_;
    that.setg(nullptr, nullptr, nullptr);
  }
  return *this;
}

inline void ReaderStreambuf::Initialize(Reader* src) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of ReaderStreambuf: null Reader pointer";
  reader_ = src;
  setg(const_cast<char*>(reader_->start()),
       const_cast<char*>(reader_->cursor()),
       const_cast<char*>(reader_->limit()));
  if (ABSL_PREDICT_FALSE(!reader_->ok()) && reader_->available() == 0) Fail();
}

inline void ReaderStreambuf::MoveBegin() {
  // In a closed `ReaderIStream`, `ReaderIStream::src_ != nullptr`
  // does not imply `ReaderStreambuf::reader_ != nullptr`, because
  // `ReaderIStream::streambuf_` can be left uninitialized.
  if (reader_ == nullptr) return;
  reader_->set_cursor(gptr());
}

inline void ReaderStreambuf::MoveEnd(Reader* src) {
  // In a closed `ReaderIStream`, `ReaderIStream::src_ != nullptr`
  // does not imply `ReaderStreambuf::reader_ != nullptr`, because
  // `ReaderIStream::streambuf_` can be left uninitialized.
  if (reader_ == nullptr) return;
  reader_ = src;
  setg(const_cast<char*>(reader_->start()),
       const_cast<char*>(reader_->cursor()),
       const_cast<char*>(reader_->limit()));
}

inline void ReaderStreambuf::Done() {
  reader_->set_cursor(gptr());
  setg(nullptr, nullptr, nullptr);
}

}  // namespace stream_internal

inline ReaderIStreamBase::ReaderIStreamBase(ReaderIStreamBase&& that) noexcept
    : std::istream(static_cast<std::istream&&>(that)),
      streambuf_(std::move(that.streambuf_)) {
  set_rdbuf(&streambuf_);
}

inline ReaderIStreamBase& ReaderIStreamBase::operator=(
    ReaderIStreamBase&& that) noexcept {
  if (ABSL_PREDICT_TRUE(&that != this)) {
    Done();
    std::istream::operator=(static_cast<std::istream&&>(that));
    streambuf_ = std::move(that.streambuf_);
  }
  return *this;
}

inline void ReaderIStreamBase::Reset(Closed) {
  Done();
  streambuf_ = stream_internal::ReaderStreambuf(kClosed);
  init(&streambuf_);
}

inline void ReaderIStreamBase::Reset() {
  Done();
  streambuf_ = stream_internal::ReaderStreambuf();
  init(&streambuf_);
}

inline void ReaderIStreamBase::Initialize(Reader* src) {
  streambuf_.Initialize(src);
  if (ABSL_PREDICT_FALSE(!streambuf_.ok())) setstate(std::ios_base::badbit);
}

template <typename Src>
class ReaderIStream<Src>::Mover {
 public:
  static auto member() { return &ReaderIStream::src_; }

  explicit Mover(ReaderIStream& self) { self.streambuf_.MoveBegin(); }

  void Done(ReaderIStream& self) { self.streambuf_.MoveEnd(self.src_.get()); }
};

template <typename Src>
inline ReaderIStream<Src>::ReaderIStream(Initializer<Src> src, Options options)
    : src_(std::move(src)) {
  Initialize(src_.get());
}

template <typename Src>
inline void ReaderIStream<Src>::Reset(Closed) {
  ReaderIStreamBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void ReaderIStream<Src>::Reset(Initializer<Src> src, Options options) {
  ReaderIStreamBase::Reset();
  src_.Reset(std::move(src));
  Initialize(src_.get());
}

template <typename Src>
void ReaderIStream<Src>::Done() {
  if (ABSL_PREDICT_FALSE(!is_open())) return;
  streambuf_.Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) streambuf_.Fail();
  }
  if (ABSL_PREDICT_FALSE(!streambuf_.ok())) setstate(std::ios_base::badbit);
  streambuf_.MarkClosed();
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_READER_ISTREAM_H_


================================================
FILE: riegeli/bytes/resizable_writer.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/resizable_writer.h"

#include <stddef.h>

#include <cstring>
#include <limits>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/cord_utils.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/string_reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void ResizableWriterBase::Done() {
  ResizableWriterBase::FlushImpl(FlushType::kFromObject);
  Writer::Done();
  secondary_buffer_ = Chain();
  associated_reader_.Reset();
}

inline void ResizableWriterBase::SyncSecondaryBuffer() {
  set_start_pos(pos());
  secondary_buffer_.RemoveSuffix(available(), options_);
  set_buffer();
}

inline void ResizableWriterBase::MakeSecondaryBuffer(
    size_t min_length, size_t recommended_length) {
  const absl::Span<char> buffer = secondary_buffer_.AppendBuffer(
      min_length, recommended_length, Chain::kAnyLength, options_);
  set_buffer(buffer.data(), buffer.size());
}

void ResizableWriterBase::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  if (write_size_hint == std::nullopt || ABSL_PREDICT_FALSE(!ok())) return;
  const size_t size_hint =
      UnsignedMax(SaturatingAdd(IntCast<size_t>(pos()),
                                SaturatingIntCast<size_t>(*write_size_hint)),
                  written_size_);
  if (!uses_secondary_buffer()) {
    GrowDestAndMakeBuffer(size_hint);
    return;
  }
  SyncSecondaryBuffer();
  if (ABSL_PREDICT_FALSE(!GrowDestAndMakeBuffer(size_hint))) return;
  secondary_buffer_.CopyTo(cursor() - secondary_buffer_.size());
  secondary_buffer_.Clear();
}

bool ResizableWriterBase::PushSlow(size_t min_length,
                                   size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Writer::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(min_length > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(pos()))) {
    return FailOverflow();
  }
  if (!uses_secondary_buffer()) {
    const size_t cursor_index = IntCast<size_t>(pos());
    if (cursor_index == 0 || ABSL_PREDICT_FALSE(written_size_ > cursor_index)) {
      // Allocate the first block directly in the destination. It is possible
      // that it will not need to be copied if it turns out to be the only
      // block, although this decision might cause it to remain wasteful if less
      // data are written than space requested.
      //
      // Resize the destination also if data follow the current position,
      // to make the data available for partial overwriting.
      return GrowDestAndMakeBuffer(SaturatingAdd(
          cursor_index, UnsignedMax(min_length, recommended_length)));
    }
    if (GrowDestUnderCapacityAndMakeBuffer(min_length)) return true;
    set_start_pos(cursor_index);
    set_buffer();
    written_size_ = 0;
  } else {
    SyncSecondaryBuffer();
  }
  MakeSecondaryBuffer(min_length, recommended_length);
  return true;
}

bool ResizableWriterBase::WriteSlow(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of Writer::WriteSlow(string_view): "
         "enough space available, use Write(string_view) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(pos()))) {
    return FailOverflow();
  }
  if (!uses_secondary_buffer()) {
    const size_t cursor_index = IntCast<size_t>(pos());
    if (cursor_index == 0) {
      // Allocate the first block directly in the destination. It is possible
      // that it will not need to be copied if it turns out to be the only
      // block, although this decision might cause it to remain wasteful if less
      // data are written than space requested.
      if (ABSL_PREDICT_FALSE(
              !GrowDestAndMakeBuffer(cursor_index + src.size()))) {
        return false;
      }
      std::memcpy(cursor(), src.data(), src.size());
      move_cursor(src.size());
      return true;
    }
    if (GrowDestUnderCapacityAndMakeBuffer(src.size())) {
      std::memcpy(cursor(), src.data(), src.size());
      move_cursor(src.size());
      return true;
    }
    set_start_pos(cursor_index);
    set_buffer();
    written_size_ = 0;
  } else {
    SyncSecondaryBuffer();
  }
  move_start_pos(src.size());
  secondary_buffer_.Append(src, options_);
  MakeSecondaryBuffer();
  return true;
}

bool ResizableWriterBase::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(pos()))) {
    return FailOverflow();
  }
  if (!uses_secondary_buffer()) {
    if (GrowDestUnderCapacityAndMakeBuffer(src.size())) {
      std::memcpy(cursor(), src.data(), src.size());
      move_cursor(src.size());
      return true;
    }
    set_start_pos(pos());
    set_buffer();
    written_size_ = 0;
  } else {
    SyncSecondaryBuffer();
  }
  move_start_pos(src.size());
  secondary_buffer_.Append(std::move(src), options_);
  MakeSecondaryBuffer();
  return true;
}

bool ResizableWriterBase::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(pos()))) {
    return FailOverflow();
  }
  if (!uses_secondary_buffer()) {
    if (GrowDestUnderCapacityAndMakeBuffer(src.size())) {
      src.CopyTo(cursor());
      move_cursor(src.size());
      return true;
    }
    set_start_pos(pos());
    set_buffer();
    written_size_ = 0;
  } else {
    SyncSecondaryBuffer();
  }
  move_start_pos(src.size());
  secondary_buffer_.Append(src, options_);
  MakeSecondaryBuffer();
  return true;
}

bool ResizableWriterBase::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(pos()))) {
    return FailOverflow();
  }
  if (!uses_secondary_buffer()) {
    if (GrowDestUnderCapacityAndMakeBuffer(src.size())) {
      src.CopyTo(cursor());
      move_cursor(src.size());
      return true;
    }
    set_start_pos(pos());
    set_buffer();
    written_size_ = 0;
  } else {
    SyncSecondaryBuffer();
  }
  move_start_pos(src.size());
  secondary_buffer_.Append(std::move(src), options_);
  MakeSecondaryBuffer();
  return true;
}

bool ResizableWriterBase::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(pos()))) {
    return FailOverflow();
  }
  if (!uses_secondary_buffer()) {
    if (GrowDestUnderCapacityAndMakeBuffer(src.size())) {
      cord_internal::CopyCordToArray(src, cursor());
      move_cursor(src.size());
      return true;
    }
    set_start_pos(pos());
    set_buffer();
    written_size_ = 0;
  } else {
    SyncSecondaryBuffer();
  }
  move_start_pos(src.size());
  secondary_buffer_.Append(src, options_);
  MakeSecondaryBuffer();
  return true;
}

bool ResizableWriterBase::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(pos()))) {
    return FailOverflow();
  }
  if (!uses_secondary_buffer()) {
    if (GrowDestUnderCapacityAndMakeBuffer(src.size())) {
      cord_internal::CopyCordToArray(src, cursor());
      move_cursor(src.size());
      return true;
    }
    set_start_pos(pos());
    set_buffer();
    written_size_ = 0;
  } else {
    SyncSecondaryBuffer();
  }
  move_start_pos(src.size());
  secondary_buffer_.Append(std::move(src), options_);
  MakeSecondaryBuffer();
  return true;
}

bool ResizableWriterBase::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(pos()))) {
    return FailOverflow();
  }
  if (!uses_secondary_buffer()) {
    if (GrowDestUnderCapacityAndMakeBuffer(IntCast<size_t>(src.size()))) {
      std::memset(cursor(), src.fill(), IntCast<size_t>(src.size()));
      move_cursor(IntCast<size_t>(src.size()));
      return true;
    }
    set_start_pos(pos());
    set_buffer();
    written_size_ = 0;
  } else {
    SyncSecondaryBuffer();
  }
  move_start_pos(src.size());
  src.AppendTo(secondary_buffer_, options_);
  MakeSecondaryBuffer();
  return true;
}

bool ResizableWriterBase::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (!uses_secondary_buffer()) return ResizeDest();
  SyncSecondaryBuffer();
  if (ABSL_PREDICT_FALSE(!ResizeDest())) return false;
  secondary_buffer_.CopyTo(cursor() - secondary_buffer_.size());
  secondary_buffer_.Clear();
  return true;
}

bool ResizableWriterBase::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT_NE(new_pos, pos())
      << "Failed precondition of Writer::SeekSlow(): "
         "position unchanged, use Seek() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (new_pos > pos()) {
    if (ABSL_PREDICT_FALSE(uses_secondary_buffer())) return false;
    if (ABSL_PREDICT_FALSE(new_pos > used_size())) {
      MakeDestBuffer(used_size());
      return false;
    }
  } else {
    if (uses_secondary_buffer()) {
      SyncSecondaryBuffer();
      if (ABSL_PREDICT_FALSE(!GrowDestAndMakeBuffer(IntCast<size_t>(pos())))) {
        return false;
      }
      secondary_buffer_.CopyTo(cursor() - secondary_buffer_.size());
      secondary_buffer_.Clear();
    }
    written_size_ = used_size();
  }
  MakeDestBuffer(IntCast<size_t>(new_pos));
  return true;
}

std::optional<Position> ResizableWriterBase::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  return used_size();
}

bool ResizableWriterBase::TruncateImpl(Position new_size) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (new_size > pos()) {
    if (ABSL_PREDICT_FALSE(uses_secondary_buffer())) return false;
    if (ABSL_PREDICT_FALSE(new_size > used_size())) {
      MakeDestBuffer(used_size());
      return false;
    }
  } else if (new_size > limit_pos() - secondary_buffer_.size()) {
    secondary_buffer_.RemoveSuffix(
        IntCast<size_t>(limit_pos()) - IntCast<size_t>(new_size), options_);
    set_start_pos(new_size);
    set_buffer();
    return true;
  } else {
    secondary_buffer_.Clear();
  }
  written_size_ = 0;
  MakeDestBuffer(IntCast<size_t>(new_size));
  return true;
}

Reader* ResizableWriterBase::ReadModeImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  if (!uses_secondary_buffer()) {
    MakeDestBuffer(IntCast<size_t>(pos()));
  } else {
    SyncSecondaryBuffer();
    if (ABSL_PREDICT_FALSE(!GrowDestAndMakeBuffer(IntCast<size_t>(pos())))) {
      return nullptr;
    }
    secondary_buffer_.CopyTo(cursor() - secondary_buffer_.size());
    secondary_buffer_.Clear();
  }
  StringReader<>* const reader =
      associated_reader_.ResetReader(start(), used_dest_size());
  reader->Seek(initial_pos);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/resizable_writer.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_RESIZABLE_WRITER_H_
#define RIEGELI_BYTES_RESIZABLE_WRITER_H_

#include <stddef.h>

#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

class Reader;
template <typename Src>
class StringReader;

// Template parameter independent part of `ResizableWriter`.
class ResizableWriterBase : public Writer {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // If `false`, replaces existing contents of the destination, clearing it
    // first.
    //
    // If `true`, appends to existing contents of the destination.
    //
    // Default: `false`.
    Options& set_append(bool append) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      append_ = append;
      return *this;
    }
    Options&& set_append(bool append) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_append(append));
    }
    bool append() const { return append_; }

   private:
    bool append_ = false;
  };

  bool SupportsRandomAccess() override { return true; }
  bool SupportsReadMode() override { return true; }

 protected:
  explicit ResizableWriterBase(Closed) noexcept : Writer(kClosed) {}

  explicit ResizableWriterBase(BufferOptions buffer_options);

  ResizableWriterBase(ResizableWriterBase&& that) noexcept;
  ResizableWriterBase& operator=(ResizableWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options);
  bool uses_secondary_buffer() const { return !secondary_buffer_.empty(); }

  // Returns the amount of data written, either to the destination or to
  // `secondary_buffer_`.
  size_t used_size() const;

  // Returns the amount of data written to the destination. Does not include
  // data written to `secondary_buffer_`.
  //
  // Precondition: if `uses_secondary_buffer()` then `available() == 0`
  size_t used_dest_size() const;

  // Sets buffer pointers to the destination.
  //
  // Precondition: `!uses_secondary_buffer()`
  virtual void MakeDestBuffer(size_t cursor_index) = 0;

  // Sets the size of the destination to `used_size()`. Sets buffer pointers to
  // the destination.
  //
  // Precondition: if `uses_secondary_buffer()` then `available() == 0`
  virtual bool ResizeDest() = 0;

  // Appends some uninitialized space to the destination to guarantee at least
  // `new_size` of size. Sets buffer pointers to the destination.
  //
  // Precondition: if `uses_secondary_buffer()` then `available() == 0`
  virtual bool GrowDestAndMakeBuffer(size_t new_size) = 0;

  // Increases the size of the destination at least to `pos() + min_length`
  // if this can be done without reallocation. New contents are unspecified.
  // Sets buffer pointers to the destination.
  //
  // Returns `true` on success, or `false` if there was not enough space.
  //
  // Precondition: `!uses_secondary_buffer()`
  virtual bool GrowDestUnderCapacityAndMakeBuffer(size_t min_length) = 0;

  void Done() override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using Writer::WriteSlow;
  bool WriteSlow(absl::string_view src) override;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(absl::Cord&& src) override;
  bool WriteSlow(ByteFill src) override;
  bool FlushImpl(FlushType flush_type) override;
  bool SeekSlow(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  bool TruncateImpl(Position new_size) override;
  Reader* ReadModeImpl(Position initial_pos) override;

 private:
  // Discards uninitialized space from the end of `secondary_buffer_`, so that
  // it contains only actual data written.
  void SyncSecondaryBuffer();

  // Appends uninitialized space to `secondary_buffer_`.
  void MakeSecondaryBuffer(size_t min_length = 0,
                           size_t recommended_length = 0);

  // Move `secondary_buffer_`, adjusting buffer pointers if they point to it.
  void MoveSecondaryBuffer(ResizableWriterBase& that);

  Chain::Options options_;
  // Buffered data which did not fit into the destination.
  Chain secondary_buffer_;

  // Size of written data is always `UnsignedMax(pos(), written_size_)`.
  // This is used to determine the size after seeking backwards.
  //
  // Invariant: if `uses_secondary_buffer()` then `written_size_ == 0`.
  size_t written_size_ = 0;

  AssociatedReader<StringReader<absl::string_view>> associated_reader_;

  // If `!uses_secondary_buffer()`, then the destination contains the data
  // before the current position of length `pos()`, followed by the data after
  // the current position of length `SaturatingSub(written_size_, pos())`,
  // followed by free space of length
  // `ResizableTraits::Size(*dest_) - UnsignedMax(pos(), written_size_)`.
  //
  // If `uses_secondary_buffer()`, then the destination contains the prefix of
  // the data of length `limit_pos() - secondary_buffer_.size()` followed by
  // free space, and `secondary_buffer_` contains the rest of the data of length
  // `secondary_buffer_.size() - available()` followed by free space of length
  // `available()`. In this case there is no data after the current position.
  //
  // Invariants if `ok()` (`dest_` is defined in `ResizableWriter`):
  //   `!uses_secondary_buffer() &&
  //    start() == ResizableTraits::Data(*dest_) &&
  //    start_to_limit() == ResizableTraits::Size(*dest_) &&
  //    start_pos() == 0` or
  //       `uses_secondary_buffer() &&
  //        limit() == secondary_buffer_.blocks().back().data() +
  //                   secondary_buffer_.blocks().back().size()` or
  //       `start() == nullptr`
  //   `limit_pos() >= secondary_buffer_.size()`
  //   `ResizableTraits::Size(*dest_) >= limit_pos() - secondary_buffer_.size()`
};

// A `Writer` which writes to a resizable array, resizing it as necessary.
// It generalizes `StringWriter` to other objects with a flat representation.
//
// It supports `Seek()` and `ReadMode()`.
//
// The `ResizableTraits` template parameter specifies how the resizable is
// represented. It must support at least the following static members:
//
// ```
//   // The type of the resizable. It should be movable if
//   // `!Dependency<Resizable*, Dest>::kIsStable` and the `ResizableWriter`
//   // itself is being moved.
//   using Resizable = ...;
//
//   // Returns the current data pointer.
//   static char* Data(Resizable& dest);
//
//   // Returns the current size.
//   static size_t Size(const Resizable& dest);
//
//   // If `true`, `Data(dest)` stays unchanged when a `Resizable` is moved.
//   // `kIsStable` does not have to be defined, except if
//   // `!Dependency<Resizable*, Dest>::kIsStable` and the `ResizableWriter`
//   // itself is being moved.
//   static constexpr bool kIsStable;
//
//   // Sets the size of `dest` to `new_size`.
//   //
//   // The prefix of data with `used_size` is preserved. Remaining contents are
//   // unspecified. Returns `true` on success, or `false` on failure.
//   //
//   // The intent is to resize exactly to `new_size`, but the size reported by
//   // `Size(dest)` can be larger than `new_size` if `dest` cannot represent
//   // all sizes exactly.
//   //
//   // Preconditions:
//   //   `used_size <= Size(dest)`
//   //   `used_size <= new_size`
//   static bool Resize(Resizable& dest, size_t new_size, size_t used_size);
//
//   // Increases the size of `dest` at least to `new_size`, or more to ensure
//   // amortized constant time of reallocation, or more if this can be done
//   // efficiently without reallocation.
//   //
//   // The prefix of data with `used_size` is preserved. Remaining contents are
//   // unspecified. Returns `true` on success, or `false` on failure.
//   //
//   // This is usually equivalent to reserving `new_size`, and then calling
//   // `GrowUnderCapacity()`, which can be assumed to always succeed after
//   // reserving.
//   //
//   // Preconditions:
//   //   `new_size > Size(dest)`
//   //   `used_size <= Size(dest)`
//   //   `used_size <= new_size`
//   static bool Grow(Resizable& dest, size_t new_size, size_t used_size);
//
//   // Increases the size of `dest` at least to `new_size` if this can be done
//   // without reallocation. New contents are unspecified.
//   //
//   // Returns `true` on success, or `false` if there was not enough space.
//   //
//   // If efficient resizing without filling new space is possible,
//   // then it is recommended to resize to the capacity.
//   // Otherwise, it is recommended to resize to approximately
//   // `Size(dest) + UnsignedClamp(Size(dest), kDefaultMinBlockSize,
//   //                             kDefaultMaxBlockSize)`,
//   // but at least to `new_size` and at most to the capacity.
//   //
//   // Precondition: `new_size > Size(dest)`
//   static bool GrowUnderCapacity(Resizable& dest, size_t new_size);
// ```
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the `ResizableTraits::Resizable` being written to. `Dest`
// must support `Dependency<ResizableTraits::Resizable*, Dest>`, e.g.
// `ResizableTraits::Resizable*` (not owned, default),
// `ResizableTraits::Resizable` (owned),
// `std::unique_ptr<ResizableTraits::Resizable>` (owned),
// `Any<ResizableTraits::Resizable*>` (maybe owned).
//
// The `ResizableTraits::Resizable` must not be accessed until the
// `ResizableWriter` is closed or no longer used, except that it is allowed
// to read the `ResizableTraits::Resizable` immediately after `Flush()`.
template <typename ResizableTraits,
          typename Dest = typename ResizableTraits::Resizable*>
class ResizableWriter : public ResizableWriterBase {
 public:
  using Resizable = typename ResizableTraits::Resizable;

  // Creates a closed `ResizableWriter`.
  explicit ResizableWriter(Closed) noexcept : ResizableWriterBase(kClosed) {}

  // Will write to the `Resizable` provided by `dest`.
  explicit ResizableWriter(Initializer<Dest> dest, Options options = Options());

  // Will write to an owned `Resizable` which can be accessed by `dest()`.
  // This constructor is present only if `Dest` is `Resizable` which is
  // default-constructible.
  template <typename DependentDest = Dest,
            std::enable_if_t<
                std::conjunction_v<std::is_same<DependentDest, Resizable>,
                                   std::is_default_constructible<Resizable>>,
                int> = 0>
  explicit ResizableWriter(Options options = Options());

  ResizableWriter(ResizableWriter&& that) = default;
  ResizableWriter& operator=(ResizableWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `ResizableWriter`. This
  // avoids constructing a temporary `ResizableWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());
  template <typename DependentDest = Dest,
            std::enable_if_t<
                std::conjunction_v<std::is_same<DependentDest, Resizable>,
                                   std::is_default_constructible<Resizable>>,
                int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Options options = Options());

  // Returns the object providing and possibly owning the `Resizable` being
  // written to. Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }

  // Returns the `Resizable` being written to. Unchanged by `Close()`.
  Resizable* DestResizable() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.get();
  }
  Resizable& Digest() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    Flush();
    return *DestResizable();
  }

 protected:
  void Initialize(Resizable* dest, bool append);

  void MakeDestBuffer(size_t cursor_index) override;
  bool ResizeDest() override;
  bool GrowDestAndMakeBuffer(size_t new_size) override;
  bool GrowDestUnderCapacityAndMakeBuffer(size_t min_length) override;

 private:
  class Mover;

  // The object providing and possibly owning the `Resizable` being written
  // to, with uninitialized space appended (possibly empty); `cursor()` points
  // to the uninitialized space.
  MovingDependency<Resizable*, Dest, Mover> dest_;
};

// `ResizableTraits` for `std::string` with an arbitrary allocator, i.e.
// `std::basic_string<char, std::char_traits<char>, Alloc>`.
template <typename Alloc = std::allocator<char>>
struct StringResizableTraits {
  using Resizable = std::basic_string<char, std::char_traits<char>, Alloc>;
  static char* Data(Resizable& dest) { return dest.data(); }
  static size_t Size(const Resizable& dest) { return dest.size(); }
  static constexpr bool kIsStable = false;
  static bool Resize(Resizable& dest, size_t new_size, size_t used_size) {
    RIEGELI_ASSERT_LE(used_size, dest.size())
        << "Failed precondition of ResizableTraits::Resize(): "
           "used size exceeds old size";
    RIEGELI_ASSERT_LE(used_size, new_size)
        << "Failed precondition of ResizableTraits::Resize(): "
           "used size exceeds new size";
    Reserve(dest, new_size, used_size);
    dest.resize(new_size);
    return true;
  }
  static bool Grow(Resizable& dest, size_t new_size, size_t used_size) {
    RIEGELI_ASSERT_GT(new_size, dest.size())
        << "Failed precondition of ResizableTraits::Grow(): "
           "no need to grow";
    RIEGELI_ASSERT_LE(used_size, dest.size())
        << "Failed precondition of ResizableTraits::Grow(): "
           "used size exceeds old size";
    RIEGELI_ASSERT_LE(used_size, new_size)
        << "Failed precondition of ResizableTraits::Grow(): "
           "used size exceeds new size";
    Reserve(dest, new_size, used_size);
    if (!GrowUnderCapacity(dest, new_size)) RIEGELI_ASSUME_UNREACHABLE();
    return true;
  }
  static bool GrowUnderCapacity(Resizable& dest, size_t new_size) {
    RIEGELI_ASSERT_GT(new_size, dest.size())
        << "Failed precondition of ResizableTraits::GrowUnderCapacity(): "
           "no need to grow";
    if (new_size > dest.capacity()) return false;
    new_size = UnsignedClamp(
        dest.size() + UnsignedClamp(dest.size(), kDefaultMinBlockSize,
                                    kDefaultMaxBlockSize),
        new_size, dest.capacity());
    dest.resize(new_size);
    return true;
  }

 private:
  static void Reserve(Resizable& dest, size_t new_capacity, size_t used_size) {
    if (new_capacity > dest.capacity()) {
      dest.erase(used_size);
      // Use `std::string().capacity()` instead of `Resizable().capacity()`
      // because `Resizable` is not necessarily default-constructible. They are
      // normally the same, and even if they are not, this is a matter of
      // performance tuning, not correctness.
      dest.reserve(dest.capacity() <= std::string().capacity()
                       ? new_capacity
                       : UnsignedClamp(dest.capacity() + dest.capacity() / 2,
                                       new_capacity, dest.max_size()));
    }
    RIEGELI_ASSUME_GE(dest.capacity(), new_capacity);
  }
};

// Implementation details follow.

inline ResizableWriterBase::ResizableWriterBase(BufferOptions buffer_options)
    : options_(Chain::Options()
                   .set_min_block_size(buffer_options.min_buffer_size())
                   .set_max_block_size(buffer_options.max_buffer_size())) {}

inline ResizableWriterBase::ResizableWriterBase(
    ResizableWriterBase&& that) noexcept
    : Writer(static_cast<Writer&&>(that)),
      options_(that.options_),
      written_size_(that.written_size_),
      associated_reader_(std::move(that.associated_reader_)) {
  MoveSecondaryBuffer(that);
}

inline ResizableWriterBase& ResizableWriterBase::operator=(
    ResizableWriterBase&& that) noexcept {
  Writer::operator=(static_cast<Writer&&>(that));
  options_ = that.options_;
  written_size_ = that.written_size_;
  associated_reader_ = std::move(that.associated_reader_);
  MoveSecondaryBuffer(that);
  return *this;
}

inline void ResizableWriterBase::Reset(Closed) {
  Writer::Reset(kClosed);
  options_ = Chain::Options();
  secondary_buffer_ = Chain();
  written_size_ = 0;
  associated_reader_.Reset();
}

inline void ResizableWriterBase::Reset(BufferOptions buffer_options) {
  Writer::Reset();
  options_ = Chain::Options()
                 .set_min_block_size(buffer_options.min_buffer_size())
                 .set_max_block_size(buffer_options.max_buffer_size());
  secondary_buffer_.Clear();
  written_size_ = 0;
  associated_reader_.Reset();
}

inline void ResizableWriterBase::MoveSecondaryBuffer(
    ResizableWriterBase& that) {
  // Buffer pointers are already moved so `start()` is taken from `*this`.
  // `secondary_buffer_` is not moved yet so `uses_secondary_buffer()` is called
  // on `that`.
  const bool uses_buffer = start() != nullptr && that.uses_secondary_buffer();
  const size_t saved_start_to_limit = start_to_limit();
  const size_t saved_start_to_cursor = start_to_cursor();
  if (uses_buffer) {
    RIEGELI_ASSERT(that.secondary_buffer_.blocks().back().data() +
                       that.secondary_buffer_.blocks().back().size() ==
                   limit())
        << "Failed invariant of ResizableWriter: "
           "secondary buffer inconsistent with buffer pointers";
  }
  secondary_buffer_ = std::move(that.secondary_buffer_);
  if (uses_buffer) {
    const absl::string_view last_block = secondary_buffer_.blocks().back();
    set_buffer(const_cast<char*>(last_block.data() + last_block.size()) -
                   saved_start_to_limit,
               saved_start_to_limit, saved_start_to_cursor);
  }
}

inline size_t ResizableWriterBase::used_size() const {
  return UnsignedMax(IntCast<size_t>(pos()), written_size_);
}

inline size_t ResizableWriterBase::used_dest_size() const {
  if (uses_secondary_buffer()) {
    RIEGELI_ASSERT_EQ(available(), 0u)
        << "Failed precondition of ResizableWriterBase::used_dest_size(): "
        << "secondary buffer has free space";
  }
  RIEGELI_ASSERT_GE(used_size(), secondary_buffer_.size())
      << "Failed invariant of ResizableWriterBase: "
         "negative destination size";
  return used_size() - secondary_buffer_.size();
}

template <typename ResizableTraits, typename Dest>
class ResizableWriter<ResizableTraits, Dest>::Mover {
 public:
  static auto member() { return &ResizableWriter::dest_; }

  explicit Mover(ResizableWriter& self, ResizableWriter& that)
      : uses_buffer_(!ResizableTraits::kIsStable && self.start() != nullptr &&
                     !self.uses_secondary_buffer()),
        start_to_cursor_(self.start_to_cursor()) {
    if (uses_buffer_) {
      RIEGELI_ASSERT_EQ(ResizableTraits::Data(*that.dest_), self.start())
          << "ResizableWriter destination changed unexpectedly";
      RIEGELI_ASSERT_EQ(ResizableTraits::Size(*that.dest_),
                        self.start_to_limit())
          << "ResizableWriter destination changed unexpectedly";
    }
  }

  void Done(ResizableWriter& self) {
    if (uses_buffer_) {
      Resizable& dest = *self.dest_;
      self.set_buffer(ResizableTraits::Data(dest), ResizableTraits::Size(dest),
                      start_to_cursor_);
    }
  }

 private:
  bool uses_buffer_;
  size_t start_to_cursor_;
};

template <typename ResizableTraits, typename Dest>
inline ResizableWriter<ResizableTraits, Dest>::ResizableWriter(
    Initializer<Dest> dest, Options options)
    : ResizableWriterBase(options.buffer_options()), dest_(std::move(dest)) {
  Initialize(dest_.get(), options.append());
}

template <typename ResizableTraits, typename Dest>
template <
    typename DependentDest,
    std::enable_if_t<
        std::conjunction_v<
            std::is_same<DependentDest, typename ResizableTraits::Resizable>,
            std::is_default_constructible<typename ResizableTraits::Resizable>>,
        int>>
inline ResizableWriter<ResizableTraits, Dest>::ResizableWriter(Options options)
    : ResizableWriter(riegeli::Maker(), std::move(options)) {}

template <typename ResizableTraits, typename Dest>
inline void ResizableWriter<ResizableTraits, Dest>::Reset(Closed) {
  ResizableWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename ResizableTraits, typename Dest>
inline void ResizableWriter<ResizableTraits, Dest>::Reset(
    Initializer<Dest> dest, Options options) {
  ResizableWriterBase::Reset(options.buffer_options());
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options.append());
}

template <typename ResizableTraits, typename Dest>
template <
    typename DependentDest,
    std::enable_if_t<
        std::conjunction_v<
            std::is_same<DependentDest, typename ResizableTraits::Resizable>,
            std::is_default_constructible<typename ResizableTraits::Resizable>>,
        int>>
inline void ResizableWriter<ResizableTraits, Dest>::Reset(Options options) {
  Reset(riegeli::Maker(), std::move(options));
}

template <typename ResizableTraits, typename Dest>
inline void ResizableWriter<ResizableTraits, Dest>::Initialize(Resizable* dest,
                                                               bool append) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of ResizableWriter: null Resizable pointer";
  if (append) {
    set_start_pos(ResizableTraits::Size(*dest));
  } else {
    set_buffer(ResizableTraits::Data(*dest), ResizableTraits::Size(*dest));
  }
}

template <typename ResizableTraits, typename Dest>
void ResizableWriter<ResizableTraits, Dest>::MakeDestBuffer(
    size_t cursor_index) {
  RIEGELI_ASSERT(!uses_secondary_buffer())
      << "Failed precondition in ResizableWriter::MakeDestBuffer(): "
         "secondary buffer is used";
  Resizable& dest = *dest_;
  set_buffer(ResizableTraits::Data(dest), ResizableTraits::Size(dest),
             cursor_index);
  set_start_pos(0);
}

template <typename ResizableTraits, typename Dest>
bool ResizableWriter<ResizableTraits, Dest>::ResizeDest() {
  if (uses_secondary_buffer()) {
    RIEGELI_ASSERT_EQ(available(), 0u)
        << "Failed precondition of ResizableWriter::ResizeDest(): "
        << "secondary buffer has free space";
  }
  const size_t new_size = used_size();
  const size_t cursor_index = IntCast<size_t>(pos());
  if (ABSL_PREDICT_FALSE(
          !ResizableTraits::Resize(*dest_, new_size, used_dest_size()))) {
    return FailOverflow();
  }
  Resizable& dest = *dest_;
  RIEGELI_ASSERT_GE(ResizableTraits::Size(dest), new_size)
      << "Failed postcondition of ResizableTraits::Resize(): "
         "not resized to at least requested size";
  set_buffer(ResizableTraits::Data(dest), ResizableTraits::Size(dest),
             cursor_index);
  set_start_pos(0);
  return true;
}

template <typename ResizableTraits, typename Dest>
bool ResizableWriter<ResizableTraits, Dest>::GrowDestAndMakeBuffer(
    size_t new_size) {
  if (uses_secondary_buffer()) {
    RIEGELI_ASSERT_EQ(available(), 0u)
        << "Failed precondition of ResizableWriter::GrowDestAndMakeBuffer(): "
        << "secondary buffer has free space";
  }
  const size_t cursor_index = IntCast<size_t>(pos());
  if (ABSL_PREDICT_TRUE(new_size > ResizableTraits::Size(*dest_))) {
    if (ABSL_PREDICT_FALSE(
            !ResizableTraits::Grow(*dest_, new_size, used_dest_size()))) {
      return FailOverflow();
    }
  }
  Resizable& dest = *dest_;
  RIEGELI_ASSERT_GE(ResizableTraits::Size(dest), new_size)
      << "Failed postcondition of ResizableTraits::Grow(): "
         "not resized to at least requested size";
  set_buffer(ResizableTraits::Data(dest), ResizableTraits::Size(dest),
             cursor_index);
  set_start_pos(0);
  return true;
}

template <typename ResizableTraits, typename Dest>
bool ResizableWriter<ResizableTraits, Dest>::GrowDestUnderCapacityAndMakeBuffer(
    size_t min_length) {
  RIEGELI_ASSERT(!uses_secondary_buffer())
      << "Failed precondition in "
         "ResizableWriter::GrowDestUnderCapacityAndMakeBuffer(): "
         "secondary buffer is used";
  RIEGELI_ASSERT_LE(min_length,
                    std::numeric_limits<size_t>::max() - IntCast<size_t>(pos()))
      << "Failed precondition of "
         "ResizableWriter::GrowDestUnderCapacityAndMakeBuffer(): "
         "Writer position overflow";
  const size_t cursor_index = IntCast<size_t>(pos());
  const size_t new_size = cursor_index + min_length;
  if (ABSL_PREDICT_TRUE(new_size > ResizableTraits::Size(*dest_))) {
    if (!ResizableTraits::GrowUnderCapacity(*dest_, new_size)) return false;
  }
  Resizable& dest = *dest_;
  RIEGELI_ASSERT_GE(ResizableTraits::Size(dest), new_size)
      << "Failed postcondition of ResizableTraits::GrowUnderCapacity(): "
         "not resized to at least requested size";
  set_buffer(ResizableTraits::Data(dest), ResizableTraits::Size(dest),
             cursor_index);
  set_start_pos(0);
  return true;
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_RESIZABLE_WRITER_H_


================================================
FILE: riegeli/bytes/restricted_chain_writer.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/restricted_chain_writer.h"

#include <stddef.h>

#include <limits>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/strings/cord.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void RestrictedChainWriter::Done() {
  if (ABSL_PREDICT_TRUE(ok())) {
    RIEGELI_ASSERT_EQ(limit_pos(), dest_.size())
        << "RestrictedChainWriter destination changed unexpectedly";
    SyncBuffer();
  }
  Writer::Done();
}

inline void RestrictedChainWriter::SyncBuffer() {
  set_start_pos(pos());
  dest_.RemoveSuffix(available());
  set_buffer();
}

inline void RestrictedChainWriter::MakeBuffer(size_t min_length,
                                              size_t recommended_length) {
  const absl::Span<char> buffer =
      dest_.AppendBuffer(min_length, recommended_length, Chain::kAnyLength);
  set_buffer(buffer.data(), buffer.size());
}

bool RestrictedChainWriter::PushSlow(size_t min_length,
                                     size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Writer::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  RIEGELI_ASSERT_EQ(limit_pos(), dest_.size())
      << "RestrictedChainWriter destination changed unexpectedly";
  SyncBuffer();
  if (ABSL_PREDICT_FALSE(min_length > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  MakeBuffer(min_length, recommended_length);
  return true;
}

bool RestrictedChainWriter::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  RIEGELI_ASSERT_EQ(limit_pos(), dest_.size())
      << "RestrictedChainWriter destination changed unexpectedly";
  SyncBuffer();
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  dest_.Append(std::move(src));
  MakeBuffer();
  return true;
}

bool RestrictedChainWriter::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  RIEGELI_ASSERT_EQ(limit_pos(), dest_.size())
      << "RestrictedChainWriter destination changed unexpectedly";
  SyncBuffer();
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  dest_.Append(src);
  MakeBuffer();
  return true;
}

bool RestrictedChainWriter::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  RIEGELI_ASSERT_EQ(limit_pos(), dest_.size())
      << "RestrictedChainWriter destination changed unexpectedly";
  SyncBuffer();
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  dest_.Append(std::move(src));
  MakeBuffer();
  return true;
}

bool RestrictedChainWriter::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  RIEGELI_ASSERT_EQ(limit_pos(), dest_.size())
      << "RestrictedChainWriter destination changed unexpectedly";
  SyncBuffer();
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  dest_.Append(src);
  MakeBuffer();
  return true;
}

bool RestrictedChainWriter::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  RIEGELI_ASSERT_EQ(limit_pos(), dest_.size())
      << "RestrictedChainWriter destination changed unexpectedly";
  SyncBuffer();
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  dest_.Append(std::move(src));
  MakeBuffer();
  return true;
}

bool RestrictedChainWriter::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  RIEGELI_ASSERT_EQ(limit_pos(), dest_.size())
      << "RestrictedChainWriter destination changed unexpectedly";
  SyncBuffer();
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  src.AppendTo(dest_);
  MakeBuffer();
  return true;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/restricted_chain_writer.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_RESTRICTED_CHAIN_WRITER_H_
#define RIEGELI_BYTES_RESTRICTED_CHAIN_WRITER_H_

#include <stddef.h>

#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/object.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

// A restricted version of `ChainWriter` with minimal dependencies.
//
// In comparison to `ChainWriter`, `RestrictedChainWriter` always owns the
// destination `Chain` by value, and does not support appending, tuning block
// sizes, effective `SetWriteSizeHint()`, effective `Flush()`, `Seek()`,
// `Size()`, `Truncate()`, nor `ReadMode()`.
//
// It is intended to be used together with `WriterStringifySink` which needs
// only writing.
class RestrictedChainWriter : public Writer {
 public:
  // Creates a closed `RestrictedChainWriter`.
  explicit RestrictedChainWriter(Closed) noexcept : Writer(kClosed) {}

  // Will append to an owned `Chain` which can be accessed by `dest()`.
  RestrictedChainWriter() = default;

  RestrictedChainWriter(RestrictedChainWriter&& that) noexcept;
  RestrictedChainWriter& operator=(RestrictedChainWriter&& that) noexcept;

  // Makes `*this` equivalent to a newly constructed `RestrictedChainWriter`.
  // This avoids constructing a temporary `RestrictedChainWriter` and moving
  // from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset();

  // Returns the `Chain` being written to. Unchanged by `Close()`.
  Chain& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_; }
  const Chain& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_; }

 protected:
  void Done() override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using Writer::WriteSlow;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(absl::Cord&& src) override;
  bool WriteSlow(ByteFill src) override;

 private:
  // Discards uninitialized space from the end of `dest_`, so that it contains
  // only actual data written.
  void SyncBuffer();

  // Appends uninitialized space to `dest_`.
  void MakeBuffer(size_t min_length = 0, size_t recommended_length = 0);

  // Moves `that.dest_` to `dest_`. Buffer pointers are already moved from
  // `dest_` to `*this`; adjust them to match `dest_`.
  void MoveDest(RestrictedChainWriter&& that);

  // Invariants if `ok()`:
  //   `limit() == nullptr || limit() == dest_.blocks().back().data() +
  //                                     dest_.blocks().back().size()`
  //   `limit_pos() == dest_.size()`
  Chain dest_;
};

// Implementation details follow.

inline RestrictedChainWriter::RestrictedChainWriter(
    RestrictedChainWriter&& that) noexcept
    : Writer(static_cast<Writer&&>(that)) {
  MoveDest(std::move(that));
}

inline RestrictedChainWriter& RestrictedChainWriter::operator=(
    RestrictedChainWriter&& that) noexcept {
  Writer::operator=(static_cast<Writer&&>(that));
  MoveDest(std::move(that));
  return *this;
}

inline void RestrictedChainWriter::Reset(Closed) {
  Writer::Reset(kClosed);
  dest_ = Chain();
}

inline void RestrictedChainWriter::Reset() {
  Writer::Reset();
  dest_.Clear();
}

inline void RestrictedChainWriter::MoveDest(RestrictedChainWriter&& that) {
  const bool uses_buffer = start() != nullptr;
  if (uses_buffer) {
    RIEGELI_ASSERT(that.dest_.blocks().back().data() +
                       that.dest_.blocks().back().size() ==
                   limit())
        << "RestrictedChainWriter destination changed unexpectedly";
    RIEGELI_ASSERT_EQ(that.dest_.size(), limit_pos())
        << "RestrictedChainWriter destination changed unexpectedly";
  }
  const size_t saved_start_to_cursor = start_to_cursor();
  dest_ = std::move(that.dest_);
  if (uses_buffer) {
    const size_t buffer_size = dest_.size() - IntCast<size_t>(start_pos());
    const absl::string_view last_block = dest_.blocks().back();
    set_buffer(
        const_cast<char*>(last_block.data() + last_block.size()) - buffer_size,
        buffer_size, saved_start_to_cursor);
  }
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_RESTRICTED_CHAIN_WRITER_H_


================================================
FILE: riegeli/bytes/splitting_writer.cc
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/splitting_writer.h"

#include <stddef.h>

#include <limits>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/chain_reader.h"
#include "riegeli/bytes/cord_reader.h"
#include "riegeli/bytes/pushable_writer.h"
#include "riegeli/bytes/string_reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void SplittingWriterBase::DoneBehindScratch() {
  if (ABSL_PREDICT_TRUE(ok())) {
    Writer* shard = ShardWriter();
    if (shard_is_open(shard)) {
      SyncBuffer(*shard);
      CloseShardInternal();
    }
  }
}

bool SplittingWriterBase::CloseShardImpl() {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of SplittingWriterBase::CloseShardImpl()";
  RIEGELI_ASSERT(shard_is_open())
      << "Failed precondition of SplittingWriterBase::CloseShardImpl(): "
         "shard already closed";
  Writer* shard = ShardWriter();
  if (ABSL_PREDICT_FALSE(!shard->Close())) {
    return FailWithoutAnnotation(AnnotateOverShard(shard->status()));
  }
  return true;
}

inline bool SplittingWriterBase::OpenShardInternal() {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of SplittingWriterBase::OpenShardInternal()";
  RIEGELI_ASSERT(!shard_is_open())
      << "Failed precondition of SplittingWriterBase::OpenShardInternal(): "
         "shard already opened";
  if (ABSL_PREDICT_FALSE(start_pos() == std::numeric_limits<Position>::max())) {
    return FailOverflow();
  }
  const std::optional<Position> size_limit = OpenShardImpl();
  if (ABSL_PREDICT_FALSE(size_limit == std::nullopt)) {
    RIEGELI_ASSERT(!ok())
        << "Failed postcondition of SplittingWriterBase::OpenShardImpl(): "
           "zero returned but SplittingWriterBase OK";
    return false;
  }
  RIEGELI_ASSERT_OK(*this)
      << "Failed postcondition of SplittingWriterBase::OpenShardImpl()";
  RIEGELI_ASSERT(shard_is_open())
      << "Failed postcondition of SplittingWriterBase::OpenShardImpl(): "
         "shard not opened";
  shard_pos_limit_ = SaturatingAdd(start_pos(), *size_limit);
  Writer* shard = ShardWriter();
  Position limit_hint = shard_pos_limit_;
  if (size_hint_ != std::nullopt) {
    limit_hint = UnsignedMin(limit_hint, *size_hint_);
  }
  shard->SetWriteSizeHint(SaturatingSub(limit_hint, start_pos()));
  return true;
}

inline bool SplittingWriterBase::CloseShardInternal() {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of SplittingWriterBase::CloseShardInternal()";
  RIEGELI_ASSERT(shard_is_open())
      << "Failed precondition of SplittingWriterBase::CloseShardInternal(): "
         "shard already closed";
  const bool close_shard_ok = CloseShardImpl();
  RIEGELI_ASSERT(!shard_is_open())
      << "Failed postcondition of SplittingWriterBase::CloseShardImpl(): "
         "shard not closed";
  if (ABSL_PREDICT_FALSE(!close_shard_ok)) {
    RIEGELI_ASSERT(!ok())
        << "Failed postcondition of SplittingWriterBase::CloseShardImpl(): "
           "SplittingWriterBase OK";
    return false;
  }
  RIEGELI_ASSERT_OK(*this)
      << "Failed postcondition of SplittingWriterBase::CloseShardImpl()";
  return true;
}

bool SplittingWriterBase::OpenShard() {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of SplittingWriterBase::OpenShard()";
  RIEGELI_ASSERT(!shard_is_open())
      << "Failed precondition of SplittingWriterBase::OpenShard(): "
         "shard already opened";
  if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
  Writer* shard = ShardWriter();
  MakeBuffer(*shard);
  return true;
}

bool SplittingWriterBase::CloseShard() {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of SplittingWriterBase::CloseShard()";
  RIEGELI_ASSERT(shard_is_open())
      << "Failed precondition of SplittingWriterBase::CloseShard(): "
         "shard already closed";
  Writer* shard = ShardWriter();
  SyncBuffer(*shard);
  return CloseShardInternal();
}

absl::Status SplittingWriterBase::AnnotateStatusImpl(absl::Status status) {
  Writer* shard = ShardWriter();
  if (shard_is_open(shard)) status = shard->AnnotateStatus(std::move(status));
  // The status might have been annotated by `*shard` with the position within
  // the shard. Clarify that the current position is the position across shards
  // instead of delegating to `PushableWriter::AnnotateStatusImpl()`.
  return AnnotateOverShard(std::move(status));
}

absl::Status SplittingWriterBase::AnnotateOverShard(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("across shards at byte ", pos()));
  }
  return status;
}

void SplittingWriterBase::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  if (ABSL_PREDICT_FALSE(!ok())) return;
  if (write_size_hint == std::nullopt) {
    size_hint_ = std::nullopt;
  } else {
    size_hint_ = SaturatingAdd(pos(), *write_size_hint);
  }
  Writer* shard = ShardWriter();
  if (!shard_is_open(shard)) return;
  BehindScratch behind_scratch(this);
  SyncBuffer(*shard);
  Position limit_hint = shard_pos_limit_;
  if (size_hint_ != std::nullopt) {
    limit_hint = UnsignedMin(limit_hint, *size_hint_);
  }
  shard->SetWriteSizeHint(SaturatingSub(limit_hint, start_pos()));
  MakeBuffer(*shard);
}

bool SplittingWriterBase::PushBehindScratch(size_t recommended_length) {
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of PushableWriter::PushBehindScratch(): "
         "some space available, use Push() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::PushBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  RIEGELI_ASSERT_LE(pos(), shard_pos_limit_)
      << "Failed invariant of SplittingWriter: "
         "current position exceeds the shard limit";
  Writer* shard = ShardWriter();
  if (!shard_is_open(shard)) {
    if (!AllowEmptyShards()) return ForcePushUsingScratch();
    if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
    shard = ShardWriter();
  } else {
    SyncBuffer(*shard);
    while (start_pos() == shard_pos_limit_) {
      if (ABSL_PREDICT_FALSE(!CloseShardInternal())) return false;
      if (!AllowEmptyShards()) return ForcePushUsingScratch();
      if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
      shard = ShardWriter();
    }
  }
  const bool push_ok = shard->Push(1, recommended_length);
  MakeBuffer(*shard);
  return push_ok;
}

bool SplittingWriterBase::WriteBehindScratch(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of "
         "PushableWriter::WriteBehindScratch(string_view): "
         "enough space available, use Write(string_view) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PushableWriter::WriteBehindScratch(string_view): "
         "scratch used";
  return WriteInternal<StringReader<const absl::string_view*>>(src);
}

bool SplittingWriterBase::WriteBehindScratch(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of PushableWriter::WriteBehindScratch(Chain): "
         "enough space available, use Write(Chain) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::WriteBehindScratch(Chain): "
         "scratch used";
  return WriteInternal<ChainReader<>>(src);
}

bool SplittingWriterBase::WriteBehindScratch(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of PushableWriter::WriteBehindScratch(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::WriteBehindScratch(Chain&&): "
         "scratch used";
  return WriteInternal<ChainReader<>>(std::move(src));
}

bool SplittingWriterBase::WriteBehindScratch(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of PushableWriter::WriteBehindScratch(Cord): "
         "enough space available, use Write(Cord) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::WriteBehindScratch(Cord): "
         "scratch used";
  return WriteInternal<CordReader<>>(src);
}

bool SplittingWriterBase::WriteBehindScratch(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of PushableWriter::WriteBehindScratch(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::WriteBehindScratch(Cord&&): "
         "scratch used";
  return WriteInternal<CordReader<>>(std::move(src));
}

template <typename SrcReader, typename Src>
inline bool SplittingWriterBase::WriteInternal(Src&& src) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  RIEGELI_ASSERT_LE(pos(), shard_pos_limit_)
      << "Failed invariant of SplittingWriter: "
         "current position exceeds the shard limit";
  Writer* shard = ShardWriter();
  if (shard_is_open(shard)) {
    SyncBuffer(*shard);
  } else {
    if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
    shard = ShardWriter();
  }
  Position length_to_write = shard_pos_limit_ - start_pos();
  bool write_ok;
  size_t length = src.size();
  if (ABSL_PREDICT_TRUE(length <= length_to_write)) {
    if (ABSL_PREDICT_FALSE(!shard->Write(std::forward<Src>(src)))) {
      write_ok = false;
    } else {
      move_start_pos(length);
      write_ok = true;
    }
  } else {
    SrcReader reader(&src);
    for (;;) {
      if (ABSL_PREDICT_FALSE(!reader.Copy(length_to_write, *shard))) {
        RIEGELI_ASSERT(!shard->ok()) << "Reading failed";
        write_ok = false;
        break;
      }
      move_start_pos(length_to_write);
      length -= length_to_write;
      if (length == 0) {
        write_ok = true;
        break;
      }
      if (ABSL_PREDICT_FALSE(!CloseShardInternal())) return false;
      if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
      shard = ShardWriter();
      length_to_write = UnsignedMin(length, shard_pos_limit_ - start_pos());
    }
  }
  MakeBuffer(*shard);
  return write_ok;
}

bool SplittingWriterBase::WriteBehindScratch(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of "
         "PushableWriter::WriteBehindScratch(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of "
         "PushableWriter::WriteBehindScratch(ByteFill): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  RIEGELI_ASSERT_LE(pos(), shard_pos_limit_)
      << "Failed invariant of SplittingWriter: "
         "current position exceeds the shard limit";
  Writer* shard = ShardWriter();
  if (shard_is_open(shard)) {
    SyncBuffer(*shard);
  } else {
    if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
    shard = ShardWriter();
  }
  bool write_ok;
  for (;;) {
    const Position length_to_write =
        UnsignedMin(src.size(), shard_pos_limit_ - start_pos());
    if (ABSL_PREDICT_FALSE(!shard->Write(src.Extract(length_to_write)))) {
      write_ok = false;
      break;
    }
    move_start_pos(length_to_write);
    if (src.empty()) {
      write_ok = true;
      break;
    }
    if (ABSL_PREDICT_FALSE(!CloseShardInternal())) return false;
    if (ABSL_PREDICT_FALSE(!OpenShardInternal())) return false;
    shard = ShardWriter();
  }
  MakeBuffer(*shard);
  return write_ok;
}

bool SplittingWriterBase::FlushBehindScratch(FlushType flush_type) {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::FlushBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  RIEGELI_ASSERT_LE(pos(), shard_pos_limit_)
      << "Failed invariant of SplittingWriter: "
         "current position exceeds the shard limit";
  Writer* const shard = ShardWriter();
  if (shard_is_open(shard)) {
    SyncBuffer(*shard);
    if (flush_type != FlushType::kFromObject) {
      if (ABSL_PREDICT_FALSE(!shard->Flush(flush_type))) return false;
    }
    if (ABSL_PREDICT_FALSE(!CloseShardInternal())) return false;
  }
  return true;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/splitting_writer.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_SPLITTING_WRITER_H_
#define RIEGELI_BYTES_SPLITTING_WRITER_H_

#include <stddef.h>

#include <optional>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/pushable_writer.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

// Template parameter independent part of `SplittingWriter`.
class SplittingWriterBase : public PushableWriter {
 protected:
  using PushableWriter::PushableWriter;

  SplittingWriterBase(SplittingWriterBase&& that) noexcept;
  SplittingWriterBase& operator=(SplittingWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset();

  void DoneBehindScratch() override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;

  // Returns the shard `Writer`.
  virtual Writer* ShardWriter() ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;
  virtual const Writer* ShardWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Opens the next shard as `shard()`. Or opens a temporary destination for
  // shard data as `shard()`, to be moved to the final destination later.
  //
  // Preconditions:
  //   `ok()`
  //   `!shard_is_open()`
  //
  // Return values:
  //  * size limit      - success (`ok()`, `shard_is_open()`)
  //  * `std::nullopt` - failure (`!ok()`)
  //
  // When the size limit would be exceeded, the shard is closed and a new shard
  // is opened.
  //
  // `OpenShardImpl()` must be overridden but should not be called directly
  // because it does not synchronize buffer pointers of `*ShardWriter()` with
  // `*this`. See `OpenShard()` for that.
  virtual std::optional<Position> OpenShardImpl() = 0;

  // Closes `shard()`. If `shard()` is a temporary destination for shard data,
  // moves it to the final destination.
  //
  // Preconditions:
  //   `ok()`
  //   `shard_is_open()`
  //
  // Return values:
  //  * `true`  - success (`ok()`, `!shard_is_open()`)
  //  * `false` - failure (`!ok()`, `!shard_is_open()`)
  //
  // The default implementation calls `shard_witer()->Close()` and propagates
  // failures from that.
  //
  // `CloseShardImpl()` can be overridden but should not be called directly
  // because it does not synchronize buffer pointers of `*this` with
  // `*ShardWriter()`. See `CloseShard()` for that.
  virtual bool CloseShardImpl();

  // Calls `OpenShardImpl()` and synchronizes buffer pointers of
  // `*ShardWriter()` with `*this`.
  //
  // Preconditions:
  //   `ok()`
  //   `!shard_is_open()`
  //
  // Return values:
  //  * `true`  - success (`ok()`, `shard_is_open()`)
  //  * `false` - failure (`!ok()`)
  bool OpenShard();

  // Synchronizes buffer pointers of `*this` with `*ShardWriter()` and calls
  // `CloseShardImpl()`.
  //
  // Preconditions:
  //   `ok()`
  //   `shard_is_open()`
  //
  // Return values:
  //  * `true`  - success (`ok()`, `!shard_is_open()`)
  //  * `false` - failure (`!ok()`, `!shard_is_open()`)
  bool CloseShard();

  // Returns `true` if a shard is open.
  //
  // Same as `shard != nullptr && shard->is_open()`, with the default `shard` of
  // `ShardWriter()`.
  bool shard_is_open() const;
  bool shard_is_open(const Writer* shard) const;

  // Should return `true` if `OpenShardImpl()` and then `CloseShardImpl()` may
  // be called with no data written to `shard()`. This makes `Push()` at a shard
  // boundary more efficient, because the buffer will be created directly in the
  // shard, but `OpenShardImpl()` and `CloseShardImpl()` must be able to deal
  // with empty shards.
  //
  // Should return `false` if `OpenShardImpl()` may be called only if the shard
  // will definitely have some data written before calling `CloseShardImpl()`.
  // This makes `Push()` at a shard boundary less efficient, because the
  // beginning of the data will be written to a scratch buffer and then copied
  // to the shard, but `OpenShardImpl()` and `CloseShardImpl()` can assume that
  // all shards are non-empty.
  virtual bool AllowEmptyShards() { return false; }

  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverShard(absl::Status status);

  // Sets cursor of `shard` to cursor of `*this`. Sets buffer pointers of
  // `*this` to `nullptr`.
  void SyncBuffer(Writer& shard);

  // Sets buffer pointers of `*this` to buffer pointers of `shard`. Fails
  // `*this` if `shard` failed.
  void MakeBuffer(Writer& shard);

  bool PushBehindScratch(size_t recommended_length) override;
  bool WriteBehindScratch(absl::string_view src) override;
  bool WriteBehindScratch(const Chain& src) override;
  bool WriteBehindScratch(Chain&& src) override;
  bool WriteBehindScratch(const absl::Cord& src) override;
  bool WriteBehindScratch(absl::Cord&& src) override;
  bool WriteBehindScratch(ByteFill src) override;

  // Flushes the current shard if `flush_type != FlushType::kFromObject`.
  // Then closes the current shard.
  bool FlushBehindScratch(FlushType flush_type) override;

 private:
  bool OpenShardInternal();
  bool CloseShardInternal();

  // This template is defined and used only in splitting_writer.cc.
  template <typename SrcReader, typename Src>
  bool WriteInternal(Src&& src);

  std::optional<Position> size_hint_;

  // The limit of `pos()` for data written to the current shard.
  Position shard_pos_limit_ = 0;

  // Invariants if `ok()` and scratch is not used:
  //   `start() == (shard_is_open() ? ShardWriter()->cursor() : nullptr)`
  //   `limit() <= (shard_is_open() ? ShardWriter()->limit() : nullptr)`
  //   `pos() <= shard_pos_limit_`
};

// Abstract class of a `Writer` which splits data into multiple shards. When a
// new shard is opened, the size limit of this shard is declared.
//
// The `Shard` template parameter specifies the type of the object providing and
// possibly owning the shard `Writer`. `Shard` must support
// `Dependency<Writer*, Shard>`, e.g. `Writer*` (not owned),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
template <typename Shard>
class SplittingWriter : public SplittingWriterBase {
 protected:
  using SplittingWriterBase::SplittingWriterBase;

  SplittingWriter(SplittingWriter&& that) = default;
  SplittingWriter& operator=(SplittingWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `SplittingWriter`. This
  // avoids constructing a temporary `SplittingWriter` and moving from it.
  // Derived classes which override `Reset()` should include a call to
  // `SplittingWriter::Reset()`.
  void Reset(Closed);
  void Reset();

  void Done() override;

  // Returns the object providing and possibly owning the shard `Writer`.
  Shard& shard() ABSL_ATTRIBUTE_LIFETIME_BOUND { return shard_.manager(); }
  const Shard& shard() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return shard_.manager();
  }
  Writer* ShardWriter() ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return shard_.get();
  }
  const Writer* ShardWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return shard_.get();
  }

 private:
  class Mover;

  // The object providing and possibly owning the shard `Writer`.
  MovingDependency<Writer*, Shard, Mover> shard_;
};

// Implementation details follow.

inline SplittingWriterBase::SplittingWriterBase(
    SplittingWriterBase&& that) noexcept
    : PushableWriter(static_cast<PushableWriter&&>(that)),
      size_hint_(that.size_hint_),
      shard_pos_limit_(that.shard_pos_limit_) {}

inline SplittingWriterBase& SplittingWriterBase::operator=(
    SplittingWriterBase&& that) noexcept {
  PushableWriter::operator=(static_cast<PushableWriter&&>(that));
  size_hint_ = that.size_hint_;
  shard_pos_limit_ = that.shard_pos_limit_;
  return *this;
}

inline void SplittingWriterBase::Reset(Closed) {
  PushableWriter::Reset(kClosed);
  size_hint_ = std::nullopt;
  shard_pos_limit_ = 0;
}

inline void SplittingWriterBase::Reset() {
  PushableWriter::Reset();
  size_hint_ = std::nullopt;
  shard_pos_limit_ = 0;
}

inline bool SplittingWriterBase::shard_is_open() const {
  return shard_is_open(ShardWriter());
}

inline bool SplittingWriterBase::shard_is_open(const Writer* shard) const {
  return shard != nullptr && shard->is_open();
}

inline void SplittingWriterBase::SyncBuffer(Writer& shard) {
  RIEGELI_ASSERT(shard_is_open(&shard))
      << "Failed precondition of SplittingWriterBase::SyncBuffer(): "
         "shard is closed";
  shard.set_cursor(cursor());
  move_start_pos(start_to_cursor());
  set_buffer();
}

inline void SplittingWriterBase::MakeBuffer(Writer& shard) {
  RIEGELI_ASSERT(shard_is_open(&shard))
      << "Failed precondition of SplittingWriterBase::MakeBuffer(): "
         "shard is closed";
  RIEGELI_ASSERT_LE(start_pos(), shard_pos_limit_)
      << "Failed invariant of SplittingWriter: "
         "current position exceeds the shard limit";
  set_buffer(shard.cursor(),
             UnsignedMin(shard.available(), shard_pos_limit_ - start_pos()));
  if (ABSL_PREDICT_FALSE(!shard.ok())) {
    FailWithoutAnnotation(AnnotateOverShard(shard.status()));
  }
}

template <typename Shard>
class SplittingWriter<Shard>::Mover {
 public:
  static auto member() { return &SplittingWriter::shard_; }

  explicit Mover(SplittingWriter& self, SplittingWriter& that)
      : behind_scratch_(&self), uses_buffer_(self.start() != nullptr) {
    // Buffer pointers are already moved so `SyncBuffer()` is called on `self`.
    // `shard_` is not moved yet so `shard_` is taken from `that`.
    if (uses_buffer_) self.SyncBuffer(*that.shard_);
  }

  void Done(SplittingWriter& self) {
    if (uses_buffer_) self.MakeBuffer(*self.shard_);
  }

 private:
  BehindScratch behind_scratch_;
  bool uses_buffer_;
};

template <typename Shard>
inline void SplittingWriter<Shard>::Reset(Closed) {
  SplittingWriterBase::Reset(kClosed);
  shard_.Reset();
}

template <typename Shard>
inline void SplittingWriter<Shard>::Reset() {
  SplittingWriterBase::Reset();
  shard_.Reset();
}

template <typename Shard>
void SplittingWriter<Shard>::Done() {
  SplittingWriterBase::Done();
  shard_.Reset();
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_SPLITTING_WRITER_H_


================================================
FILE: riegeli/bytes/std_io.cc
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/std_io.h"

#include <utility>

#include "riegeli/base/assert.h"
#include "riegeli/base/global.h"
#include "riegeli/base/sized_shared_buffer.h"
#include "riegeli/bytes/fd_reader.h"
#include "riegeli/bytes/fd_writer.h"

namespace riegeli {

namespace {

int std_in_fd = 0;
int std_out_fd = 1;
int std_err_fd = 2;

SizedSharedBuffer& StdInPending() {
  return Global([] { return SizedSharedBuffer(); });
}

}  // namespace

StdIn::StdIn(Options options) : FdReader(std_in_fd, std::move(options)) {
  SizedSharedBuffer& pending = StdInPending();
  if (!pending.empty()) RestoreBuffer(std::move(pending));
}

void StdIn::Reset(Options options) {
  FdReader::Reset(std_in_fd, std::move(options));
  SizedSharedBuffer& pending = StdInPending();
  if (!pending.empty()) RestoreBuffer(std::move(pending));
}

void StdIn::Done() {
  RIEGELI_ASSERT(StdInPending().empty())
      << "Multiple instances of StdIn in use at a time";
  if (available() > 0 && !SupportsRandomAccess()) StdInPending() = SaveBuffer();
  FdReader::Done();
}

StdOut::StdOut(Options options) : FdWriter(std_out_fd, std::move(options)) {}

void StdOut::Reset(Options options) {
  FdWriter::Reset(std_out_fd, std::move(options));
}

StdErr::StdErr(Options options) : FdWriter(std_err_fd, std::move(options)) {}

void StdErr::Reset(Options options) {
  FdWriter::Reset(std_err_fd, std::move(options));
}

InjectedStdInFd::InjectedStdInFd(int fd)
    : old_fd_(std::exchange(std_in_fd, fd)),
      old_pending_(std::move(StdInPending())) {}

InjectedStdInFd::~InjectedStdInFd() {
  std_in_fd = old_fd_;
  StdInPending() = std::move(old_pending_);
}

InjectedStdOutFd::InjectedStdOutFd(int fd)
    : old_fd_(std::exchange(std_out_fd, fd)) {}

InjectedStdOutFd::~InjectedStdOutFd() { std_out_fd = old_fd_; }

InjectedStdErrFd::InjectedStdErrFd(int fd)
    : old_fd_(std::exchange(std_err_fd, fd)) {}

InjectedStdErrFd::~InjectedStdErrFd() { std_err_fd = old_fd_; }

}  // namespace riegeli


================================================
FILE: riegeli/bytes/std_io.h
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_STD_IO_H_
#define RIEGELI_BYTES_STD_IO_H_

#include "absl/base/attributes.h"
#include "riegeli/base/object.h"
#include "riegeli/base/sized_shared_buffer.h"
#include "riegeli/bytes/fd_handle.h"
#include "riegeli/bytes/fd_reader.h"
#include "riegeli/bytes/fd_writer.h"

namespace riegeli {

// A new `Reader` reading from standard input (by default from the same source
// as fd 0, `std::cin`, and `stdin`).
//
// A `StdIn` must be explicitly closed or synced in order for its position to be
// synchronized to the actual standard input. Closing a `StdIn` does not close
// its file descriptor.
//
// Warning: synchronizing the position is feasible only if standard input
// supports random access, otherwise standard input will have an unpredictable
// amount of extra data consumed because of buffering. Nevertheless, closing
// a `StdIn` and then creating another in the same process preserves these
// pending data.
//
// At most one `StdIn` should be open at a time, and it should not be combined
// with accessing standard input by other means.
class StdIn : public FdReader<UnownedFd> {
 public:
  // Creates a closed `StdIn`.
  explicit StdIn(Closed) noexcept : FdReader(kClosed) {}

  // Will read from standard input.
  explicit StdIn(Options options = Options());

  StdIn(StdIn&& that) = default;
  StdIn& operator=(StdIn&& that) = default;

  // Makes `*this` equivalent to a newly constructed `StdIn`. This avoids
  // constructing a temporary `StdIn` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Options options = Options());

 protected:
  void Done() override;
};

// A new `Writer` writing to standard output (by default to the same destination
// as fd 1, `std::cout`, and `stdout`).
//
// In contrast to `std::cout` and `stdout`, `StdOut` is fully buffered (not line
// buffered) even if it refers to an interactive device.
//
// A `StdOut` must be explicitly closed or flushed, and `Close()` or `Flush()`
// must succeed, in order for its output to be guaranteed to be available in the
// actual standard output. Closing a `StdOut` does not close its file
// descriptor. Flushing a `StdOut` explicitly might be needed:
//  * Before reading from standard input, so that output written so far appears
//    before waiting for input.
//  * Before writing to standard error, so that output written to different
//    streams ultimately leading to the same destination appears in the correct
//    order.
//
// At most one `StdOut` should be open at a time, and it should not be combined
// with accessing standard output by other means at the same time. Switching
// between means requires closing the old `StdOut` or flushing the object
// becoming inactive (`std::cout.flush()`, `std::fflush(stdout)`) and may
// require repositioning the object becoming active (`std::cout.seekp()`,
// `std::fseek(stdout)`).
//
// As an alternative to `StdOut`, creating and later closing an
// `OStreamWriter(&std::cout)` makes it easier to combine writing to a `Writer`
// with accessing `std::cout`.
class StdOut : public FdWriter<UnownedFd> {
 public:
  // Creates a closed `StdOut`.
  explicit StdOut(Closed) noexcept : FdWriter(kClosed) {}

  // Will write to standard output.
  explicit StdOut(Options options = Options());

  StdOut(StdOut&& that) = default;
  StdOut& operator=(StdOut&& that) = default;

  // Makes `*this` equivalent to a newly constructed `StdOut`. This avoids
  // constructing a temporary `StdOut` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Options options = Options());
};

// A new `Writer` writing to standard error (by default to the same destination
// as fd 2, `std::cerr`, `std::clog`, and `stderr`).
//
// In contrast to `std::cerr` and `stderr`, `StdErr` is fully buffered (not
// unbuffered).
//
// A `StdErr` must be explicitly closed or flushed, and `Close()` or `Flush()`
// must succeed, in order for its output to be guaranteed to be available in the
// actual standard error. Closing a `StdErr` does not close its file descriptor.
// Flushing a `StdErr` explicitly might be needed after writing a complete
// message, so that it appears promptly.
//
// At most one `StdErr` should be open at a time, and it should not be combined
// with accessing standard error by other means at the same time. Switching
// between means requires closing the old `StdErr` or flushing the object
// becoming inactive (`std::clog.flush()`) and may require repositioning the
// object becoming active (`std::cerr.seekp()`, `std::clog.seekp()`,
// `std::fseek(stderr)`).
//
// As an alternative to `StdErr`, creating and later closing an
// `OStreamWriter(&std::cerr)` makes it easier to combine writing to a `Writer`
// with accessing `std::cerr`.
class StdErr : public FdWriter<UnownedFd> {
 public:
  // Creates a closed `StdErr`.
  explicit StdErr(Closed) noexcept : FdWriter(kClosed) {}

  // Will write to standard error.
  explicit StdErr(Options options = Options());

  StdErr(StdErr&& that) = default;
  StdErr& operator=(StdErr&& that) = default;

  // Makes `*this` equivalent to a newly constructed `StdErr`. This avoids
  // constructing a temporary `StdErr` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Options options = Options());
};

// Sets file descriptors used by future instances of `Std{In,Out,Err}` in the
// constructor. Restores the old value in the destructor. This affects global
// state and is not thread safe.
//
// This is intended for testing of code which hardcodes usage of standard
// streams. The preferred way of testing is to avoid redirecting standard
// streams, by letting the code take explicit parameters specifying a `Reader`
// or `Writer`, or their factory.
//
// Standard streams can also be redirected at a lower level, using `dup2()`.

class InjectedStdInFd {
 public:
  explicit InjectedStdInFd(int fd);

  InjectedStdInFd(const InjectedStdInFd&) = delete;
  InjectedStdInFd& operator=(const InjectedStdInFd&) = delete;

  ~InjectedStdInFd();

 private:
  int old_fd_;
  SizedSharedBuffer old_pending_;
};

class InjectedStdOutFd {
 public:
  explicit InjectedStdOutFd(int fd);

  InjectedStdOutFd(const InjectedStdOutFd&) = delete;
  InjectedStdOutFd& operator=(const InjectedStdOutFd&) = delete;

  ~InjectedStdOutFd();

 private:
  int old_fd_;
};

class InjectedStdErrFd {
 public:
  explicit InjectedStdErrFd(int fd);

  InjectedStdErrFd(const InjectedStdErrFd&) = delete;
  InjectedStdErrFd& operator=(const InjectedStdErrFd&) = delete;

  ~InjectedStdErrFd();

 private:
  int old_fd_;
};

// Implementation details follow.

inline void StdIn::Reset(Closed) { FdReader::Reset(kClosed); }

inline void StdOut::Reset(Closed) { FdWriter::Reset(kClosed); }

inline void StdErr::Reset(Closed) { FdWriter::Reset(kClosed); }

}  // namespace riegeli

#endif  // RIEGELI_BYTES_STD_IO_H_


================================================
FILE: riegeli/bytes/string_reader.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/string_reader.h"

#include <stddef.h>

#include <memory>
#include <optional>

#include "absl/base/optimization.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

bool StringReaderBase::PullSlow(size_t min_length, size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Reader::PullSlow(): "
         "enough data available, use Pull() instead";
  return false;
}

bool StringReaderBase::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of Reader::SeekSlow(): "
         "position in the buffer, use Seek() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  RIEGELI_ASSERT_EQ(start_pos(), 0u)
      << "Failed invariant of StringReader: non-zero position of buffer start";
  // Seeking forwards. Source ends.
  set_cursor(limit());
  return false;
}

std::optional<Position> StringReaderBase::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  return limit_pos();
}

std::unique_ptr<Reader> StringReaderBase::NewReaderImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point.
  std::unique_ptr<Reader> reader =
      std::make_unique<StringReader<>>(start(), start_to_limit());
  reader->Seek(initial_pos);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/string_reader.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_STRING_READER_H_
#define RIEGELI_BYTES_STRING_READER_H_

#include <stddef.h>

#include <memory>
#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

// Template parameter independent part of `StringReader`.
class StringReaderBase : public Reader {
 public:
  // Returns the `std::string` or array being read from. Unchanged by `Close()`.
  virtual absl::string_view SrcStringView() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool ToleratesReadingAhead() override { return true; }
  bool SupportsRandomAccess() override { return true; }
  bool SupportsNewReader() override { return true; }

 protected:
  using Reader::Reader;

  StringReaderBase(StringReaderBase&& that) noexcept;
  StringReaderBase& operator=(StringReaderBase&& that) noexcept;

  void Initialize(absl::string_view src);

  bool PullSlow(size_t min_length, size_t recommended_length) override;
  bool SeekSlow(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;

  // Invariants if `is_open()`:
  //   `start() == SrcStringView().data()`
  //   `start_to_limit() == SrcStringView().size()`
  //   `start_pos() == 0`
};

// A `Reader` which reads from a `std::string` or array.
//
// It supports random access and `NewReader()`.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the `std::string` or array being read from. `Src` must
// support `Dependency<absl::string_view, Src>`, e.g.
// `absl::string_view` (not owned, default), `const std::string*` (not owned),
// `std::string` (owned), `Any<absl::string_view>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as
// `absl::string_view` if there are no constructor arguments or if the first
// constructor argument is an lvalue reference to a type convertible to
// `absl::string_view` (to avoid unintended string copying) or to `const char*`
// (to compute `std::strlen()` early), otherwise as `TargetT` of the type of the
// first constructor argument.
//
// It might be better to use `ChainReader<Chain>` instead of
// `StringReader<std::string>` to allow sharing the data (`Chain` blocks are
// reference counted, `std::string` data have a single owner).
//
// The `std::string` or array must not be changed until the `StringReader` is
// closed or no longer used.
template <typename Src = absl::string_view>
class StringReader : public StringReaderBase {
 public:
  // Creates a closed `StringReader`.
  explicit StringReader(Closed) noexcept : StringReaderBase(kClosed) {}

  // Will read from the `std::string` or array provided by `src`.
  explicit StringReader(Initializer<Src> src);

  // Will read from an empty `absl::string_view`. This constructor is present
  // only if `Src` is `absl::string_view`.
  template <typename DependentSrc = Src,
            std::enable_if_t<std::is_same_v<DependentSrc, absl::string_view>,
                             int> = 0>
  StringReader();

  // Will read from `absl::string_view(src, size)`. This constructor is present
  // only if `Src` is `absl::string_view`.
  template <typename DependentSrc = Src,
            std::enable_if_t<std::is_same_v<DependentSrc, absl::string_view>,
                             int> = 0>
  explicit StringReader(const char* src ABSL_ATTRIBUTE_LIFETIME_BOUND,
                        size_t size);

  StringReader(StringReader&& that) = default;
  StringReader& operator=(StringReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `StringReader`. This avoids
  // constructing a temporary `StringReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src);
  template <typename DependentSrc = Src,
            std::enable_if_t<std::is_same_v<DependentSrc, absl::string_view>,
                             int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset();
  template <typename DependentSrc = Src,
            std::enable_if_t<std::is_same_v<DependentSrc, absl::string_view>,
                             int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(const char* src, size_t size);

  // Returns the object providing and possibly owning the `std::string` or array
  // being read from. Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  absl::string_view SrcStringView() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 private:
  class Mover;

  // The object providing and possibly owning the `std::string` or array being
  // read from.
  MovingDependency<absl::string_view, Src, Mover> src_;
};

explicit StringReader(Closed) -> StringReader<DeleteCtad<Closed>>;
template <typename Src>
explicit StringReader(Src&& src) -> StringReader<std::conditional_t<
    std::disjunction_v<
        std::conjunction<std::is_lvalue_reference<Src>,
                         std::is_convertible<Src, absl::string_view>>,
        std::is_convertible<Src&&, const char*>>,
    absl::string_view, TargetT<Src>>>;
StringReader() -> StringReader<>;
explicit StringReader(const char* src, size_t size) -> StringReader<>;

// Implementation details follow.

inline StringReaderBase::StringReaderBase(StringReaderBase&& that) noexcept
    : Reader(static_cast<Reader&&>(that)) {}

inline StringReaderBase& StringReaderBase::operator=(
    StringReaderBase&& that) noexcept {
  Reader::operator=(static_cast<Reader&&>(that));
  return *this;
}

inline void StringReaderBase::Initialize(absl::string_view src) {
  set_buffer(src.data(), src.size());
  move_limit_pos(available());
}

template <typename Src>
class StringReader<Src>::Mover {
 public:
  static auto member() { return &StringReader::src_; }

  explicit Mover(StringReader& self, StringReader& that)
      : uses_buffer_(self.start() != nullptr),
        start_to_cursor_(self.start_to_cursor()) {
    if (uses_buffer_) {
      RIEGELI_ASSERT_EQ(that.src_.get().data(), self.start())
          << "StringReader source changed unexpectedly";
      RIEGELI_ASSERT_EQ(that.src_.get().size(), self.start_to_limit())
          << "StringReader source changed unexpectedly";
    }
  }

  void Done(StringReader& self) {
    if (uses_buffer_) {
      const absl::string_view src = self.src_.get();
      self.set_buffer(src.data(), src.size(), start_to_cursor_);
    }
  }

 private:
  bool uses_buffer_;
  size_t start_to_cursor_;
};

template <typename Src>
inline StringReader<Src>::StringReader(Initializer<Src> src)
    : src_(std::move(src)) {
  Initialize(src_.get());
}

template <typename Src>
template <
    typename DependentSrc,
    std::enable_if_t<std::is_same_v<DependentSrc, absl::string_view>, int>>
inline StringReader<Src>::StringReader() : StringReader(absl::string_view()) {}

template <typename Src>
template <
    typename DependentSrc,
    std::enable_if_t<std::is_same_v<DependentSrc, absl::string_view>, int>>
inline StringReader<Src>::StringReader(
    const char* src ABSL_ATTRIBUTE_LIFETIME_BOUND, size_t size)
    : StringReader(absl::string_view(src, size)) {}

template <typename Src>
inline void StringReader<Src>::Reset(Closed) {
  StringReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void StringReader<Src>::Reset(Initializer<Src> src) {
  StringReaderBase::Reset();
  src_.Reset(std::move(src));
  Initialize(src_.get());
}

template <typename Src>
template <
    typename DependentSrc,
    std::enable_if_t<std::is_same_v<DependentSrc, absl::string_view>, int>>
inline void StringReader<Src>::Reset() {
  Reset(absl::string_view());
}

template <typename Src>
template <
    typename DependentSrc,
    std::enable_if_t<std::is_same_v<DependentSrc, absl::string_view>, int>>
inline void StringReader<Src>::Reset(const char* src, size_t size) {
  Reset(absl::string_view(src, size));
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_STRING_READER_H_


================================================
FILE: riegeli/bytes/string_writer.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/string_writer.h"

#include <stddef.h>

#include <cstring>
#include <limits>
#include <optional>
#include <string>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/cord_utils.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/string_utils.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/string_reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void StringWriterBase::Done() {
  StringWriterBase::FlushImpl(FlushType::kFromObject);
  Writer::Done();
  secondary_buffer_ = Chain();
  associated_reader_.Reset();
}

inline size_t StringWriterBase::used_size() const {
  return UnsignedMax(IntCast<size_t>(pos()), written_size_);
}

inline size_t StringWriterBase::used_dest_size() const {
  if (uses_secondary_buffer()) {
    RIEGELI_ASSERT_EQ(available(), 0u)
        << "Failed precondition of StringWriterBase::used_dest_size(): "
        << "secondary buffer has free space";
  }
  RIEGELI_ASSERT_GE(used_size(), secondary_buffer_.size())
      << "Failed invariant of StringWriterBase: "
         "negative destination size";
  return used_size() - secondary_buffer_.size();
}

inline void StringWriterBase::MakeDestBuffer(std::string& dest,
                                             size_t cursor_index) {
  RIEGELI_ASSERT(!uses_secondary_buffer())
      << "Failed precondition in StringWriterBase::MakeDestBuffer(): "
         "secondary buffer is used";
  set_buffer(dest.data(), dest.size(), cursor_index);
  set_start_pos(0);
}

inline void StringWriterBase::GrowDestAndMakeBuffer(std::string& dest,
                                                    size_t cursor_index,
                                                    size_t new_size) {
  if (uses_secondary_buffer()) {
    RIEGELI_ASSERT_EQ(available(), 0u)
        << "Failed precondition of StringWriterBase::GrowDestAndMakeBuffer(): "
        << "secondary buffer has free space";
  }
  const size_t old_size = dest.size();
  if (ABSL_PREDICT_TRUE(new_size > old_size)) {
    StringResizeAmortized(dest, new_size);
    MarkPoisoned(dest.data() + old_size, new_size - old_size);
  }
  set_buffer(dest.data(), dest.size(), cursor_index);
  set_start_pos(0);
}

inline bool StringWriterBase::GrowDestUnderCapacityAndMakeBuffer(
    std::string& dest, size_t cursor_index, size_t min_length) {
  RIEGELI_ASSERT(!uses_secondary_buffer())
      << "Failed precondition in "
         "StringWriterBase::GrowDestUnderCapacityAndMakeBuffer(): "
         "secondary buffer is used";
  RIEGELI_ASSERT_LE(min_length,
                    std::numeric_limits<size_t>::max() - cursor_index)
      << "Failed precondition of "
         "StringWriterBase::GrowDestUnderCapacityAndMakeBuffer(): "
         "Writer position overflow";
  size_t new_size = cursor_index + min_length;
  if (new_size > dest.capacity()) return false;
  new_size = UnsignedClamp(
      dest.size() + UnsignedClamp(dest.size(), kDefaultMinBlockSize,
                                  kDefaultMaxBlockSize),
      new_size, dest.capacity());
  const size_t old_size = dest.size();
  if (ABSL_PREDICT_TRUE(new_size > old_size)) {
    dest.resize(new_size);
    MarkPoisoned(dest.data() + old_size, new_size - old_size);
  }
  set_buffer(dest.data(), dest.size(), cursor_index);
  set_start_pos(0);
  return true;
}

inline void StringWriterBase::SyncSecondaryBuffer() {
  set_start_pos(pos());
  secondary_buffer_.RemoveSuffix(available(), options_);
  set_buffer();
}

inline void StringWriterBase::MakeSecondaryBuffer(size_t min_length,
                                                  size_t recommended_length) {
  const absl::Span<char> buffer = secondary_buffer_.AppendBuffer(
      min_length, recommended_length, Chain::kAnyLength, options_);
  set_buffer(buffer.data(), buffer.size());
}

void StringWriterBase::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  if (write_size_hint == std::nullopt || ABSL_PREDICT_FALSE(!ok())) return;
  std::string& dest = *DestString();
  RIEGELI_ASSERT_GE(dest.size(), limit_pos() - secondary_buffer_.size())
      << "StringWriter destination changed unexpectedly";
  const size_t cursor_index = IntCast<size_t>(pos());
  const size_t size_hint = UnsignedMax(
      SaturatingAdd(cursor_index, SaturatingIntCast<size_t>(*write_size_hint)),
      written_size_);
  if (!uses_secondary_buffer()) {
    GrowDestAndMakeBuffer(dest, cursor_index, size_hint);
    return;
  }
  SyncSecondaryBuffer();
  dest.erase(used_dest_size());
  StringReserveAmortized(dest, size_hint);
  std::move(secondary_buffer_).AppendTo(dest);
  secondary_buffer_.Clear();
  MakeDestBuffer(dest, cursor_index);
}

bool StringWriterBase::PushSlow(size_t min_length, size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Writer::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  std::string& dest = *DestString();
  RIEGELI_ASSERT_GE(dest.size(), limit_pos() - secondary_buffer_.size())
      << "StringWriter destination changed unexpectedly";
  if (ABSL_PREDICT_FALSE(min_length > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(pos()))) {
    return FailOverflow();
  }
  if (!uses_secondary_buffer()) {
    const size_t cursor_index = IntCast<size_t>(pos());
    if (cursor_index == 0 || ABSL_PREDICT_FALSE(written_size_ > cursor_index)) {
      // Allocate the first block directly in `dest`. It is possible that it
      // will not need to be copied if it turns out to be the only block,
      // although this decision might cause it to remain wasteful if less data
      // are written than space requested.
      //
      // Resize `dest` also if data follow the current position, to make the
      // data available for partial overwriting.
      const size_t size_hint = SaturatingAdd(
          cursor_index, UnsignedMax(min_length, recommended_length));
      StringReserveAmortized(dest, size_hint);
    }
    if (GrowDestUnderCapacityAndMakeBuffer(dest, cursor_index, min_length)) {
      return true;
    }
    set_start_pos(cursor_index);
    set_buffer();
    written_size_ = 0;
  } else {
    SyncSecondaryBuffer();
  }
  MakeSecondaryBuffer(min_length, recommended_length);
  return true;
}

bool StringWriterBase::WriteSlow(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of Writer::WriteSlow(string_view): "
         "enough space available, use Write(string_view) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  std::string& dest = *DestString();
  RIEGELI_ASSERT_GE(dest.size(), limit_pos() - secondary_buffer_.size())
      << "StringWriter destination changed unexpectedly";
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(pos()))) {
    return FailOverflow();
  }
  if (!uses_secondary_buffer()) {
    const size_t cursor_index = IntCast<size_t>(pos());
    const size_t new_cursor_index = cursor_index + src.size();
    if (cursor_index == 0) {
      // Allocate the first block directly in `dest`. It is possible that it
      // will not need to be copied if it turns out to be the only block,
      // although this decision might cause it to remain wasteful if less data
      // are written than space requested.
      StringReserveAmortized(dest, new_cursor_index);
    }
    if (new_cursor_index <= dest.capacity()) {
      if (ABSL_PREDICT_FALSE(new_cursor_index <= dest.size())) {
        std::memcpy(dest.data() + cursor_index, src.data(), src.size());
      } else {
        dest.erase(cursor_index);
        dest.append(src);
      }
      GrowDestUnderCapacityAndMakeBuffer(dest, new_cursor_index);
      return true;
    }
    const size_t prefix_size = dest.capacity() - cursor_index;
    dest.erase(cursor_index);
    dest.append(src.data(), prefix_size);
    src.remove_prefix(prefix_size);
    set_start_pos(cursor_index + prefix_size);
    set_buffer();
    written_size_ = 0;
  } else {
    SyncSecondaryBuffer();
  }
  move_start_pos(src.size());
  secondary_buffer_.Append(src, options_);
  MakeSecondaryBuffer();
  return true;
}

bool StringWriterBase::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  std::string& dest = *DestString();
  RIEGELI_ASSERT_GE(dest.size(), limit_pos() - secondary_buffer_.size())
      << "StringWriter destination changed unexpectedly";
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(pos()))) {
    return FailOverflow();
  }
  if (!uses_secondary_buffer()) {
    const size_t cursor_index = IntCast<size_t>(pos());
    const size_t new_cursor_index = cursor_index + src.size();
    if (new_cursor_index <= dest.capacity()) {
      if (ABSL_PREDICT_FALSE(new_cursor_index <= dest.size())) {
        std::memcpy(dest.data() + cursor_index, src.data(), src.size());
      } else {
        dest.erase(cursor_index);
        dest.append(absl::string_view(src));
      }
      GrowDestUnderCapacityAndMakeBuffer(dest, new_cursor_index);
      return true;
    }
    set_start_pos(cursor_index);
    set_buffer();
    written_size_ = 0;
  } else {
    SyncSecondaryBuffer();
  }
  move_start_pos(src.size());
  secondary_buffer_.Append(std::move(src), options_);
  MakeSecondaryBuffer();
  return true;
}

bool StringWriterBase::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  std::string& dest = *DestString();
  RIEGELI_ASSERT_GE(dest.size(), limit_pos() - secondary_buffer_.size())
      << "StringWriter destination changed unexpectedly";
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(pos()))) {
    return FailOverflow();
  }
  if (!uses_secondary_buffer()) {
    const size_t cursor_index = IntCast<size_t>(pos());
    const size_t new_cursor_index = cursor_index + src.size();
    if (new_cursor_index <= dest.capacity()) {
      if (ABSL_PREDICT_FALSE(new_cursor_index <= dest.size())) {
        src.CopyTo(dest.data() + cursor_index);
      } else {
        dest.erase(cursor_index);
        src.AppendTo(dest);
      }
      GrowDestUnderCapacityAndMakeBuffer(dest, new_cursor_index);
      return true;
    }
    set_start_pos(cursor_index);
    set_buffer();
    written_size_ = 0;
  } else {
    SyncSecondaryBuffer();
  }
  move_start_pos(src.size());
  secondary_buffer_.Append(src, options_);
  MakeSecondaryBuffer();
  return true;
}

bool StringWriterBase::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  std::string& dest = *DestString();
  RIEGELI_ASSERT_GE(dest.size(), limit_pos() - secondary_buffer_.size())
      << "StringWriter destination changed unexpectedly";
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(pos()))) {
    return FailOverflow();
  }
  if (!uses_secondary_buffer()) {
    const size_t cursor_index = IntCast<size_t>(pos());
    const size_t new_cursor_index = cursor_index + src.size();
    if (new_cursor_index <= dest.capacity()) {
      if (ABSL_PREDICT_FALSE(new_cursor_index <= dest.size())) {
        src.CopyTo(dest.data() + cursor_index);
      } else {
        dest.erase(cursor_index);
        std::move(src).AppendTo(dest);
      }
      GrowDestUnderCapacityAndMakeBuffer(dest, new_cursor_index);
      return true;
    }
    set_start_pos(cursor_index);
    set_buffer();
    written_size_ = 0;
  } else {
    SyncSecondaryBuffer();
  }
  move_start_pos(src.size());
  secondary_buffer_.Append(std::move(src), options_);
  MakeSecondaryBuffer();
  return true;
}

bool StringWriterBase::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  std::string& dest = *DestString();
  RIEGELI_ASSERT_GE(dest.size(), limit_pos() - secondary_buffer_.size())
      << "StringWriter destination changed unexpectedly";
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(pos()))) {
    return FailOverflow();
  }
  if (!uses_secondary_buffer()) {
    const size_t cursor_index = IntCast<size_t>(pos());
    const size_t new_cursor_index = cursor_index + src.size();
    if (new_cursor_index <= dest.capacity()) {
      if (ABSL_PREDICT_FALSE(new_cursor_index <= dest.size())) {
        cord_internal::CopyCordToArray(src, dest.data() + cursor_index);
      } else {
        dest.erase(cursor_index);
        absl::AppendCordToString(src, &dest);
      }
      GrowDestUnderCapacityAndMakeBuffer(dest, new_cursor_index);
      return true;
    }
    set_start_pos(cursor_index);
    set_buffer();
    written_size_ = 0;
  } else {
    SyncSecondaryBuffer();
  }
  move_start_pos(src.size());
  secondary_buffer_.Append(src, options_);
  MakeSecondaryBuffer();
  return true;
}

bool StringWriterBase::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  std::string& dest = *DestString();
  RIEGELI_ASSERT_GE(dest.size(), limit_pos() - secondary_buffer_.size())
      << "StringWriter destination changed unexpectedly";
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(pos()))) {
    return FailOverflow();
  }
  if (!uses_secondary_buffer()) {
    const size_t cursor_index = IntCast<size_t>(pos());
    const size_t new_cursor_index = cursor_index + src.size();
    if (new_cursor_index <= dest.capacity()) {
      if (ABSL_PREDICT_FALSE(new_cursor_index <= dest.size())) {
        cord_internal::CopyCordToArray(src, dest.data() + cursor_index);
      } else {
        dest.erase(cursor_index);
        absl::AppendCordToString(src, &dest);
      }
      GrowDestUnderCapacityAndMakeBuffer(dest, new_cursor_index);
      return true;
    }
    set_start_pos(cursor_index);
    set_buffer();
    written_size_ = 0;
  } else {
    SyncSecondaryBuffer();
  }
  move_start_pos(src.size());
  secondary_buffer_.Append(std::move(src), options_);
  MakeSecondaryBuffer();
  return true;
}

bool StringWriterBase::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  std::string& dest = *DestString();
  RIEGELI_ASSERT_GE(dest.size(), limit_pos() - secondary_buffer_.size())
      << "StringWriter destination changed unexpectedly";
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(pos()))) {
    return FailOverflow();
  }
  if (!uses_secondary_buffer()) {
    const size_t cursor_index = IntCast<size_t>(pos());
    const size_t new_cursor_index = cursor_index + IntCast<size_t>(src.size());
    if (new_cursor_index <= dest.capacity()) {
      if (ABSL_PREDICT_FALSE(new_cursor_index <= dest.size())) {
        std::memset(dest.data() + cursor_index, src.fill(),
                    IntCast<size_t>(src.size()));
      } else {
        dest.erase(cursor_index);
        dest.append(IntCast<size_t>(src.size()), src.fill());
      }
      GrowDestUnderCapacityAndMakeBuffer(dest, new_cursor_index);
      return true;
    }
    set_start_pos(cursor_index);
    set_buffer();
    written_size_ = 0;
  } else {
    SyncSecondaryBuffer();
  }
  move_start_pos(src.size());
  src.AppendTo(secondary_buffer_, options_);
  MakeSecondaryBuffer();
  return true;
}

bool StringWriterBase::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  std::string& dest = *DestString();
  RIEGELI_ASSERT_GE(dest.size(), limit_pos() - secondary_buffer_.size())
      << "StringWriter destination changed unexpectedly";
  const size_t cursor_index = IntCast<size_t>(pos());
  if (!uses_secondary_buffer()) {
    dest.erase(used_size());
  } else {
    SyncSecondaryBuffer();
    dest.erase(used_dest_size());
    std::move(secondary_buffer_).AppendTo(dest);
    secondary_buffer_.Clear();
  }
  set_buffer();
  set_start_pos(cursor_index);
  return true;
}

bool StringWriterBase::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT_NE(new_pos, pos())
      << "Failed precondition of Writer::SeekSlow(): "
         "position unchanged, use Seek() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  std::string& dest = *DestString();
  RIEGELI_ASSERT_GE(dest.size(), limit_pos() - secondary_buffer_.size())
      << "StringWriter destination changed unexpectedly";
  if (new_pos > pos()) {
    if (ABSL_PREDICT_FALSE(uses_secondary_buffer())) return false;
    if (ABSL_PREDICT_FALSE(new_pos > used_size())) {
      MakeDestBuffer(dest, used_size());
      return false;
    }
  } else {
    if (uses_secondary_buffer()) {
      SyncSecondaryBuffer();
      dest.erase(used_dest_size());
      std::move(secondary_buffer_).AppendTo(dest);
      secondary_buffer_.Clear();
    }
    written_size_ = used_size();
  }
  MakeDestBuffer(dest, IntCast<size_t>(new_pos));
  return true;
}

std::optional<Position> StringWriterBase::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  return used_size();
}

bool StringWriterBase::TruncateImpl(Position new_size) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  std::string& dest = *DestString();
  RIEGELI_ASSERT_GE(dest.size(), limit_pos() - secondary_buffer_.size())
      << "StringWriter destination changed unexpectedly";
  if (new_size > pos()) {
    if (ABSL_PREDICT_FALSE(uses_secondary_buffer())) return false;
    if (ABSL_PREDICT_FALSE(new_size > used_size())) {
      MakeDestBuffer(dest, used_size());
      return false;
    }
  } else if (new_size > limit_pos() - secondary_buffer_.size()) {
    secondary_buffer_.RemoveSuffix(
        IntCast<size_t>(limit_pos()) - IntCast<size_t>(new_size), options_);
    set_start_pos(new_size);
    set_buffer();
    return true;
  } else {
    secondary_buffer_.Clear();
  }
  written_size_ = 0;
  MakeDestBuffer(dest, IntCast<size_t>(new_size));
  return true;
}

Reader* StringWriterBase::ReadModeImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  std::string& dest = *DestString();
  RIEGELI_ASSERT_GE(dest.size(), limit_pos() - secondary_buffer_.size())
      << "StringWriter destination changed unexpectedly";
  if (uses_secondary_buffer()) {
    SyncSecondaryBuffer();
    dest.erase(used_dest_size());
    std::move(secondary_buffer_).AppendTo(dest);
    secondary_buffer_.Clear();
  }
  StringReader<>* const reader =
      associated_reader_.ResetReader(dest.data(), used_size());
  reader->Seek(initial_pos);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/string_writer.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_STRING_WRITER_H_
#define RIEGELI_BYTES_STRING_WRITER_H_

#include <stddef.h>

#include <optional>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

class Reader;
template <typename Src>
class StringReader;

// Template parameter independent part of `StringWriter`.
class StringWriterBase : public Writer {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // If `false`, replaces existing contents of the destination, clearing it
    // first.
    //
    // If `true`, appends to existing contents of the destination.
    //
    // Default: `false`.
    Options& set_append(bool append) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      append_ = append;
      return *this;
    }
    Options&& set_append(bool append) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_append(append));
    }
    bool append() const { return append_; }

   private:
    bool append_ = false;
  };

  // Returns the `std::string` being written to. Unchanged by `Close()`.
  virtual std::string* DestString() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;
  std::string& Digest() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    Flush();
    return *DestString();
  }

  bool SupportsRandomAccess() override { return true; }
  bool SupportsReadMode() override { return true; }

 protected:
  explicit StringWriterBase(Closed) noexcept : Writer(kClosed) {}

  explicit StringWriterBase(BufferOptions buffer_options);

  StringWriterBase(StringWriterBase&& that) noexcept;
  StringWriterBase& operator=(StringWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options);
  void Initialize(std::string* dest, bool append);
  bool uses_secondary_buffer() const { return !secondary_buffer_.empty(); }

  void Done() override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using Writer::WriteSlow;
  bool WriteSlow(absl::string_view src) override;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(absl::Cord&& src) override;
  bool WriteSlow(ByteFill src) override;
  bool FlushImpl(FlushType flush_type) override;
  bool SeekSlow(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  bool TruncateImpl(Position new_size) override;
  Reader* ReadModeImpl(Position initial_pos) override;

 private:
  // Returns the amount of data written, either to `*DestString()` or to
  // `secondary_buffer_`.
  size_t used_size() const;

  // Returns the amount of data written to `*DestString()`. Does not include
  // data written to `secondary_buffer_`.
  //
  // Precondition: if `uses_secondary_buffer()` then `available() == 0`
  size_t used_dest_size() const;

  // Sets buffer pointers to `dest`.
  //
  // Precondition: `!uses_secondary_buffer()`
  void MakeDestBuffer(std::string& dest, size_t cursor_index);

  // Appends some uninitialized space to `dest` to guarantee at least `new_size`
  // of size. Sets buffer pointers to `dest`.
  //
  // Precondition: if `uses_secondary_buffer()` then `available() == 0`
  void GrowDestAndMakeBuffer(std::string& dest, size_t cursor_index,
                             size_t new_size);

  // Increases the size of `dest` at least to `cursor_index + min_length`
  // if this can be done without reallocation. New contents are unspecified.
  // Sets buffer pointers to `dest`.
  //
  // Returns `true` on success, or `false` if there was not enough space.
  //
  // Precondition: `!uses_secondary_buffer()`
  bool GrowDestUnderCapacityAndMakeBuffer(std::string& dest,
                                          size_t cursor_index,
                                          size_t min_length = 0);

  // Discards uninitialized space from the end of `secondary_buffer_`, so that
  // it contains only actual data written.
  void SyncSecondaryBuffer();

  // Appends uninitialized space to `secondary_buffer_`.
  void MakeSecondaryBuffer(size_t min_length = 0,
                           size_t recommended_length = 0);

  // Move `secondary_buffer_`, adjusting buffer pointers if they point to it.
  void MoveSecondaryBuffer(StringWriterBase& that);

  Chain::Options options_;
  // Buffered data which did not fit under `DestString()->capacity()`.
  Chain secondary_buffer_;

  // Size of written data is always `UnsignedMax(pos(), written_size_)`.
  // This is used to determine the size after seeking backwards.
  //
  // Invariant: if `uses_secondary_buffer()` then `written_size_ == 0`.
  size_t written_size_ = 0;

  AssociatedReader<StringReader<absl::string_view>> associated_reader_;

  // If `!uses_secondary_buffer()`, then `*DestString()` contains the data
  // before the current position of length `pos()`, followed by the data after
  // the current position of length `SaturatingSub(written_size_, pos())`,
  // followed by free space of length
  // `DestString()->size() - UnsignedMax(pos(), written_size_)`.
  //
  // If `uses_secondary_buffer()`, then `*DestString()` contains the prefix of
  // the data of length `limit_pos() - secondary_buffer_.size()` followed by
  // free space, and `secondary_buffer_` contains the rest of the data of length
  // `secondary_buffer_.size() - available()` followed by free space of length
  // `available()`. In this case there is no data after the current position.
  //
  // Invariants if `ok()`:
  //   `!uses_secondary_buffer() &&
  //    start() == DestString()->data() &&
  //    start_to_limit() == DestString()->size() &&
  //    start_pos() == 0` or
  //       `uses_secondary_buffer() &&
  //        limit() == secondary_buffer_.blocks().back().data() +
  //                   secondary_buffer_.blocks().back().size()` or
  //       `start() == nullptr`
  //   `limit_pos() >= secondary_buffer_.size()`
  //   `DestString()->size() >= limit_pos() - secondary_buffer_.size()`
};

// A `Writer` which writes to a `std::string`. If `Options::append()` is `false`
// (the default), replaces existing contents of the `std::string`, clearing it
// first. If `Options::append()` is `true`, appends to existing contents of the
// `std::string`.
//
// It supports `Seek()` and `ReadMode()`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the `std::string` being written to. `Dest` must support
// `Dependency<std::string*, Dest>`, e.g. `std::string*` (not owned, default),
// `std::string` (owned), `Any<std::string*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `std::string`
// if there are no constructor arguments or the only argument is `Options`,
// otherwise as `TargetT` of the type of the first constructor argument, except
// that CTAD is deleted if the first constructor argument is a `std::string&`
// or `const std::string&` (to avoid writing to an unintentionally separate copy
// of an existing object).
//
// The `std::string` must not be accessed until the `StringWriter` is closed or
// no longer used, except that it is allowed to read the `std::string`
// immediately after `Flush()`.
//
// `VectorWriter` with `UninitializedVector<char>` or
// `UninitializedInlinedVector<char, inlined_size>`, as well as
// `CompactStringWriter`, is more efficient than `StringWriter`
// because the destination can be resized with uninitialized space.
template <typename Dest = std::string*>
class StringWriter : public StringWriterBase {
 public:
  // Creates a closed `StringWriter`.
  explicit StringWriter(Closed) noexcept : StringWriterBase(kClosed) {}

  // Will write to the `std::string` provided by `dest`.
  explicit StringWriter(Initializer<Dest> dest, Options options = Options());

  // Will write to an owned `std::string` which can be accessed by `dest()`.
  // This constructor is present only if `Dest` is `std::string`.
  template <
      typename DependentDest = Dest,
      std::enable_if_t<std::is_same_v<DependentDest, std::string>, int> = 0>
  explicit StringWriter(Options options = Options());

  StringWriter(StringWriter&& that) = default;
  StringWriter& operator=(StringWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `StringWriter`. This avoids
  // constructing a temporary `StringWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());
  template <
      typename DependentDest = Dest,
      std::enable_if_t<std::is_same_v<DependentDest, std::string>, int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Options options = Options());

  // Returns the object providing and possibly owning the `std::string` being
  // written to. Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  std::string* DestString() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 private:
  class Mover;

  // The object providing and possibly owning the `std::string` being written
  // to, with uninitialized space appended (possibly empty); `cursor()` points
  // to the uninitialized space.
  MovingDependency<std::string*, Dest, Mover> dest_;
};

explicit StringWriter(Closed) -> StringWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit StringWriter(Dest&& dest, StringWriterBase::Options options =
                                       StringWriterBase::Options())
    -> StringWriter<std::conditional_t<
        std::conjunction_v<std::is_lvalue_reference<Dest>,
                           std::is_convertible<std::remove_reference_t<Dest>*,
                                               const std::string*>>,
        DeleteCtad<Dest&&>, TargetT<Dest>>>;
explicit StringWriter(
    StringWriterBase::Options options = StringWriterBase::Options())
    -> StringWriter<std::string>;

// Implementation details follow.

inline StringWriterBase::StringWriterBase(BufferOptions buffer_options)
    : options_(Chain::Options()
                   .set_min_block_size(buffer_options.min_buffer_size())
                   .set_max_block_size(buffer_options.max_buffer_size())) {}

inline StringWriterBase::StringWriterBase(StringWriterBase&& that) noexcept
    : Writer(static_cast<Writer&&>(that)),
      options_(that.options_),
      written_size_(that.written_size_),
      associated_reader_(std::move(that.associated_reader_)) {
  MoveSecondaryBuffer(that);
}

inline StringWriterBase& StringWriterBase::operator=(
    StringWriterBase&& that) noexcept {
  Writer::operator=(static_cast<Writer&&>(that));
  options_ = that.options_;
  written_size_ = that.written_size_;
  associated_reader_ = std::move(that.associated_reader_);
  MoveSecondaryBuffer(that);
  return *this;
}

inline void StringWriterBase::Reset(Closed) {
  Writer::Reset(kClosed);
  options_ = Chain::Options();
  secondary_buffer_ = Chain();
  written_size_ = 0;
  associated_reader_.Reset();
}

inline void StringWriterBase::Reset(BufferOptions buffer_options) {
  Writer::Reset();
  options_ = Chain::Options()
                 .set_min_block_size(buffer_options.min_buffer_size())
                 .set_max_block_size(buffer_options.max_buffer_size());
  secondary_buffer_.Clear();
  written_size_ = 0;
  associated_reader_.Reset();
}

inline void StringWriterBase::Initialize(std::string* dest, bool append) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of StringWriter: null string pointer";
  if (append) {
    set_start_pos(dest->size());
  } else {
    set_buffer(dest->data(), dest->size());
    MarkPoisoned(cursor(), available());
  }
}

inline void StringWriterBase::MoveSecondaryBuffer(StringWriterBase& that) {
  // Buffer pointers are already moved so `start()` is taken from `*this`.
  // `secondary_buffer_` is not moved yet so `uses_secondary_buffer()` is called
  // on `that`.
  const bool uses_buffer = start() != nullptr && that.uses_secondary_buffer();
  const size_t saved_start_to_limit = start_to_limit();
  const size_t saved_start_to_cursor = start_to_cursor();
  if (uses_buffer) {
    RIEGELI_ASSERT(that.secondary_buffer_.blocks().back().data() +
                       that.secondary_buffer_.blocks().back().size() ==
                   limit())
        << "Failed invariant of StringWriter: "
           "secondary buffer inconsistent with buffer pointers";
  }
  secondary_buffer_ = std::move(that.secondary_buffer_);
  if (uses_buffer) {
    const absl::string_view last_block = secondary_buffer_.blocks().back();
    set_buffer(const_cast<char*>(last_block.data() + last_block.size()) -
                   saved_start_to_limit,
               saved_start_to_limit, saved_start_to_cursor);
  }
}

template <typename Dest>
class StringWriter<Dest>::Mover {
 public:
  static auto member() { return &StringWriter::dest_; }

  explicit Mover(StringWriter& self, StringWriter& that)
      : uses_buffer_(self.start() != nullptr && !self.uses_secondary_buffer()),
        start_to_cursor_(self.start_to_cursor()) {
    if (uses_buffer_) {
      RIEGELI_ASSERT_EQ(that.dest_->data(), self.start())
          << "StringWriter destination changed unexpectedly";
      RIEGELI_ASSERT_EQ(that.dest_->size(), self.start_to_limit())
          << "StringWriter destination changed unexpectedly";
    }
  }

  void Done(StringWriter& self) {
    if (uses_buffer_) {
      std::string& dest = *self.dest_;
      self.set_buffer(dest.data(), dest.size(), start_to_cursor_);
    }
  }

 private:
  bool uses_buffer_;
  size_t start_to_cursor_;
};

template <typename Dest>
inline StringWriter<Dest>::StringWriter(Initializer<Dest> dest, Options options)
    : StringWriterBase(options.buffer_options()), dest_(std::move(dest)) {
  Initialize(dest_.get(), options.append());
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<std::is_same_v<DependentDest, std::string>, int>>
inline StringWriter<Dest>::StringWriter(Options options)
    : StringWriter(riegeli::Maker(), std::move(options)) {}

template <typename Dest>
inline void StringWriter<Dest>::Reset(Closed) {
  StringWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void StringWriter<Dest>::Reset(Initializer<Dest> dest, Options options) {
  StringWriterBase::Reset(options.buffer_options());
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options.append());
}

template <typename Dest>
template <typename DependentDest,
          std::enable_if_t<std::is_same_v<DependentDest, std::string>, int>>
inline void StringWriter<Dest>::Reset(Options options) {
  Reset(riegeli::Maker(), std::move(options));
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_STRING_WRITER_H_


================================================
FILE: riegeli/bytes/stringify.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_STRINGIFY_H_
#define RIEGELI_BYTES_STRINGIFY_H_

#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/numeric/int128.h"
#include "absl/strings/cord.h"
#include "absl/strings/has_absl_stringify.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/write_int_internal.h"

namespace riegeli {

namespace stringify_internal {

template <typename Src, typename Enable = void>
struct HasRiegeliStringifiedSize : std::false_type {};

template <typename Src>
struct HasRiegeliStringifiedSize<
    Src, std::enable_if_t<std::is_convertible_v<
             decltype(RiegeliStringifiedSize(std::declval<const Src&>())),
             Position>>> : std::true_type {};

}  // namespace stringify_internal

// `riegeli::StringifiedSize()` of a stringifiable value returns the size of its
// stringification as `Position` if easily known, otherwise it is only declared
// as returning `void`.
//
// It has the same overloads as `Writer::Write()`, assuming that the parameter
// is passed by const reference.
//
// To customize `riegeli::StringifiedSize()` for a class `Src` supporting
// `AbslStringify()`, define a free function
// `friend Position RiegeliStringifiedSize(const Src& src)` as a friend of `Src`
// inside class definition or in the same namespace as `Src`, so that it can be
// found via ADL. A function returning `void` is treated as absent.
//
// There is no need to define `RiegeliStringifiedSize()` for types convertible
// to `BytesRef`, even if they support `AbslStringify()`.
inline Position StringifiedSize(ABSL_ATTRIBUTE_UNUSED char src) { return 1; }
#if __cpp_char8_t
inline Position StringifiedSize(ABSL_ATTRIBUTE_UNUSED char8_t src) { return 1; }
#endif
inline Position StringifiedSize(BytesRef src) { return src.size(); }
ABSL_ATTRIBUTE_ALWAYS_INLINE inline Position StringifiedSize(const char* src) {
  return absl::string_view(src).size();
}
inline Position StringifiedSize(const Chain& src) { return src.size(); }
inline Position StringifiedSize(const absl::Cord& src) { return src.size(); }
inline Position StringifiedSize(ByteFill src) { return src.size(); }
inline Position StringifiedSize(signed char src) {
  return write_int_internal::StringifiedSizeSigned(src);
}
inline Position StringifiedSize(unsigned char src) {
  return write_int_internal::StringifiedSizeUnsigned(src);
}
inline Position StringifiedSize(short src) {
  return write_int_internal::StringifiedSizeSigned(src);
}
inline Position StringifiedSize(unsigned short src) {
  return write_int_internal::StringifiedSizeUnsigned(src);
}
inline Position StringifiedSize(int src) {
  return write_int_internal::StringifiedSizeSigned(src);
}
inline Position StringifiedSize(unsigned src) {
  return write_int_internal::StringifiedSizeUnsigned(src);
}
inline Position StringifiedSize(long src) {
  return write_int_internal::StringifiedSizeSigned(src);
}
inline Position StringifiedSize(unsigned long src) {
  return write_int_internal::StringifiedSizeUnsigned(src);
}
inline Position StringifiedSize(long long src) {
  return write_int_internal::StringifiedSizeSigned(src);
}
inline Position StringifiedSize(unsigned long long src) {
  return write_int_internal::StringifiedSizeUnsigned(src);
}
inline Position StringifiedSize(absl::int128 src) {
  return write_int_internal::StringifiedSizeSigned(src);
}
inline Position StringifiedSize(absl::uint128 src) {
  return write_int_internal::StringifiedSizeUnsigned(src);
}
void StringifiedSize(float);
void StringifiedSize(double);
void StringifiedSize(long double);
template <
    typename Src,
    std::enable_if_t<
        std::conjunction_v<
            absl::HasAbslStringify<Src>,
            std::negation<std::is_convertible<const Src&, BytesRef>>,
            std::negation<std::is_convertible<const Src&, const Chain&>>,
            std::negation<std::is_convertible<const Src&, const absl::Cord&>>,
            std::negation<std::is_convertible<const Src&, ByteFill>>>,
        int> = 0>
inline auto StringifiedSize(const Src& src) {
  if constexpr (stringify_internal::HasRiegeliStringifiedSize<Src>::value) {
    return RiegeliStringifiedSize(src);
  }
}
void StringifiedSize(bool) = delete;
void StringifiedSize(wchar_t) = delete;
void StringifiedSize(char16_t) = delete;
void StringifiedSize(char32_t) = delete;

namespace stringify_internal {

template <typename T, typename Enable = void>
struct IsStringifiableImpl : std::false_type {};

template <typename T>
struct IsStringifiableImpl<T, std::void_t<decltype(riegeli::StringifiedSize(
                                  std::declval<const T&>()))>>
    : std::true_type {};

template <typename T, typename Enable = void>
struct HasStringifiedSizeImpl : std::false_type {};

template <typename T>
struct HasStringifiedSizeImpl<
    T, std::enable_if_t<std::is_convertible_v<decltype(riegeli::StringifiedSize(
                                                  std::declval<const T&>())),
                                              Position>>> : std::true_type {};

}  // namespace stringify_internal

// `IsStringifiable` checks if the type is an appropriate argument for
// `Writer::Write()`, `BackwardWriter::Write()`, `riegeli::Write()`, and e.g.
// `riegeli::WriteLine()`.
template <typename... T>
struct IsStringifiable
    : std::conjunction<stringify_internal::IsStringifiableImpl<T>...> {};

// `HasStringifiedSize` checks if the type has `riegeli::StringifiedSize()`
// defined returning the size.
template <typename... T>
struct HasStringifiedSize
    : std::conjunction<stringify_internal::HasStringifiedSizeImpl<T>...> {};

// `riegeli::StringifiedSize()` of multiple stringifiable values returns the
// total size of their stringifications, interpreted as for
// `riegeli::StringifiedSize()` with a single parameter.
template <typename... Srcs
#if !__cpp_concepts
          ,
          std::enable_if_t<
              std::conjunction_v<std::bool_constant<sizeof...(Srcs) != 1>,
                                 IsStringifiable<Srcs...>>,
              int> = 0
#endif
          >
inline auto StringifiedSize(const Srcs&... srcs)
#if __cpp_concepts
    // For conjunctions, `requires` gives better error messages than
    // `std::enable_if_t`, indicating the relevant argument.
  requires(sizeof...(Srcs) != 1) && (IsStringifiable<Srcs>::value && ...)
#endif
{
  if constexpr (HasStringifiedSize<Srcs...>::value) {
    return (Position{0} + ... + riegeli::StringifiedSize(srcs));
  }
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_STRINGIFY_H_


================================================
FILE: riegeli/bytes/stringify_writer.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_STRINGIFY_WRITER_H_
#define RIEGELI_BYTES_STRINGIFY_WRITER_H_

#include <limits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/prefix_limiting_writer.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

// A `Writer` which writes to a sink provided to `AbslStringify()`.
//
// The template parameter is the sink pointer type rather than the sink value
// type for consistency with other templated `Writer` classes parameterized by
// the type of the constructor argument. This must nevertheless be a pointer,
// not an arbitrary type supporting `Dependency`.
//
// `Dest` must support `->Append(absl::string_view)`.
template <typename Dest>
class StringifyWriter : public BufferedWriter {
 public:
  // Creates a closed `StringifyWriter`.
  explicit StringifyWriter(Closed) noexcept : BufferedWriter(kClosed) {}

  // Will write to `*dest`.
  explicit StringifyWriter(Dest dest ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : dest_(std::move(RIEGELI_EVAL_ASSERT_NOTNULL(dest))) {}

  StringifyWriter(StringifyWriter&& that) = default;
  StringifyWriter& operator=(StringifyWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `StringifyWriter`. This
  // avoids constructing a temporary `StringifyWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Dest dest);

  // Returns a pointer to the sink being written to. Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_; }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_; }

 protected:
  bool WriteInternal(absl::string_view src) override;

 private:
  Dest dest_{};
};

// Specialization of `StringifyWriter<WriterStringifySink*>` which
// avoids wrapping a `Writer` in a `WriterStringifySink` and adapting it
// back to a `Writer`.
template <>
class StringifyWriter<WriterStringifySink*> : public PrefixLimitingWriter<> {
 public:
  // Creates a closed `StringifyWriter`.
  explicit StringifyWriter(Closed) noexcept : PrefixLimitingWriter(kClosed) {}

  // Will write to `*dest`.
  explicit StringifyWriter(
      WriterStringifySink* dest ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : PrefixLimitingWriter(RIEGELI_EVAL_ASSERT_NOTNULL(dest)->dest()),
        dest_(dest) {}

  StringifyWriter(StringifyWriter&& that) = default;
  StringifyWriter& operator=(StringifyWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `StringifyWriter`. This
  // avoids constructing a temporary `StringifyWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(WriterStringifySink* dest);

  // Returns a pointer to the sink being written to. Unchanged by `Close()`.
  WriterStringifySink*& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_; }
  WriterStringifySink* const& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_;
  }

 private:
  WriterStringifySink* dest_{};
};

explicit StringifyWriter(Closed) -> StringifyWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit StringifyWriter(Dest dest) -> StringifyWriter<Dest>;

// Implementation details follow.

template <typename Dest>
inline void StringifyWriter<Dest>::Reset(Closed) {
  BufferedWriter::Reset(kClosed);
  riegeli::Reset(dest_);
}

template <typename Dest>
inline void StringifyWriter<Dest>::Reset(Dest dest) {
  BufferedWriter::Reset();
  dest_ = std::move(dest);
}

template <typename Dest>
bool StringifyWriter<Dest>::WriteInternal(absl::string_view src) {
  RIEGELI_ASSERT(!src.empty())
      << "Failed precondition of BufferedWriter::WriteInternal(): "
         "nothing to write";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedWriter::WriteInternal()";
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  dest_->Append(src);
  move_start_pos(src.size());
  return true;
}

inline void StringifyWriter<WriterStringifySink*>::Reset(Closed) {
  PrefixLimitingWriter::Reset(kClosed);
  dest_ = nullptr;
}

inline void StringifyWriter<WriterStringifySink*>::Reset(
    WriterStringifySink* dest) {
  PrefixLimitingWriter::Reset(dest->dest());
  dest_ = dest;
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_STRINGIFY_WRITER_H_


================================================
FILE: riegeli/bytes/vector_writer.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_VECTOR_WRITER_H_
#define RIEGELI_BYTES_VECTOR_WRITER_H_

#include <stddef.h>

#include <type_traits>

#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/uninitialized_vector.h"
#include "riegeli/bytes/resizable_writer.h"

namespace riegeli {

namespace vector_writer_internal {

template <typename VectorType, typename Enable = void>
struct HasAllocatorType : std::false_type {};

template <typename VectorType>
struct HasAllocatorType<VectorType,
                        std::void_t<typename VectorType::allocator_type>>
    : std::true_type {};

// `ResizableTraits` for `std::vector<T, Alloc>` including
// `UninitializedVector<T>`, `absl::InlinedVector<T, inlined_size, Alloc>`
// including `UninitializedInlinedVector<T, inlined_size>`, or a similar type.
// Its value type must be trivially copyable, usually `char`.
//
// The vector type must support at least the following members:
//
// ```
//   using value_type = ...;
//
//   value_type* data();
//   size_t size() const;
//   size_t max_size() const;
//   size_t capacity() const;
//
//   iterator begin();
//   iterator end();
//
//   void resize(size_t new_size);
//   void reserve(size_t new_capacity);
//   void erase(iterator first, iterator last);
// ```
//
// Warning: byte contents are reinterpreted as values of type `T`, and the size
// is rounded up to a multiple of the element type.
template <typename VectorType>
struct VectorResizableTraits {
 private:
  using T = typename VectorType::value_type;

 public:
  static_assert(std::is_trivially_copyable_v<T>,
                "Value type of the parameter of VectorResizableTraits must be "
                "trivially copyable");

  using Resizable = VectorType;
  static char* Data(Resizable& dest) {
    return reinterpret_cast<char*>(dest.data());
  }
  static size_t Size(const Resizable& dest) { return dest.size() * sizeof(T); }
  static constexpr bool kIsStable = true;
  static bool Resize(Resizable& dest, size_t new_size, size_t used_size) {
    RIEGELI_ASSERT_LE(used_size, dest.size() * sizeof(T))
        << "Failed precondition of ResizableTraits::Resize(): "
           "used size exceeds old size";
    RIEGELI_ASSERT_LE(used_size, new_size)
        << "Failed precondition of ResizableTraits::Resize(): "
           "used size exceeds new size";
    const size_t new_num_elements = SizeToNumElements(new_size);
    Reserve(dest, new_num_elements, used_size);
    dest.resize(new_num_elements);
    return true;
  }
  static bool Grow(Resizable& dest, size_t new_size, size_t used_size) {
    RIEGELI_ASSERT_GT(new_size, dest.size() * sizeof(T))
        << "Failed precondition of ResizableTraits::Grow(): "
           "no need to grow";
    RIEGELI_ASSERT_LE(used_size, dest.size() * sizeof(T))
        << "Failed precondition of ResizableTraits::Grow(): "
           "used size exceeds old size";
    RIEGELI_ASSERT_LE(used_size, new_size)
        << "Failed precondition of ResizableTraits::Grow(): "
           "used size exceeds new size";
    const size_t new_num_elements = SizeToNumElements(new_size);
    Reserve(dest, new_num_elements, used_size);
    if (!GrowUnderCapacity(dest, new_size)) RIEGELI_ASSUME_UNREACHABLE();
    return true;
  }
  static bool GrowUnderCapacity(Resizable& dest, size_t new_size) {
    RIEGELI_ASSERT_GT(new_size, dest.size() * sizeof(T))
        << "Failed precondition of ResizableTraits::GrowUnderCapacity(): "
           "no need to grow";
    const size_t new_num_elements = SizeToNumElements(new_size);
    if (new_num_elements > dest.capacity()) return false;
    if constexpr (HasAllocatorType<Resizable>::value) {
      if constexpr (std::is_same_v<typename Resizable::allocator_type,
                                   UninitializedAllocator<T>>) {
        dest.resize(dest.capacity());
        return true;
      }
    }
    dest.resize(UnsignedClamp(
        dest.size() + UnsignedClamp(dest.size(),
                                    SizeToNumElements(kDefaultMinBlockSize),
                                    SizeToNumElements(kDefaultMaxBlockSize)),
        new_num_elements, dest.capacity()));
    return true;
  }

 private:
  static size_t SizeToNumElements(size_t size) {
    return size / sizeof(T) + (size % sizeof(T) == 0 ? 0 : 1);
  }
  static void Reserve(Resizable& dest, size_t new_num_elements,
                      size_t used_size) {
    if (new_num_elements > dest.capacity()) {
      dest.erase(dest.begin() + SizeToNumElements(used_size), dest.end());
      if constexpr (std::is_default_constructible_v<Resizable>) {
        if (dest.capacity() <= Resizable().capacity()) {
          dest.reserve(new_num_elements);
          return;
        }
      }
      dest.reserve(UnsignedClamp(dest.capacity() + dest.capacity() / 2,
                                 new_num_elements, dest.max_size()));
    }
    RIEGELI_ASSUME_GE(dest.capacity(), new_num_elements);
  }
};

}  // namespace vector_writer_internal

// Template parameter independent part of `VectorWriter`.
using VectorWriterBase = ResizableWriterBase;

// A `Writer` which writes to `std::vector<T, Alloc>` including
// `UninitializedVector<T>`, `absl::InlinedVector<T, inlined_size, Alloc>`,
// including `UninitializedInlinedVector<T, inlined_size>`, or a similar
// type supported by `VectorResizableTraits`, called `VectorType` here.
// Its value type must be trivially copyable, usually `char`.
//
// If `Options::append()` is `false` (the default), replaces existing contents
// of the `VectorType`, clearing it first. If `Options::append()` is `true`,
// appends to existing contents of the `VectorType`.
//
// It supports `Seek()` and `ReadMode()`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the `VectorType` being written to. `Dest` must support
// `Dependency<VectorType*, Dest>`, e.g. `VectorType*` (not owned, default),
// `VectorType` (owned), `Any<VectorType*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as
// `UninitializedVector<char>` if there are no constructor arguments or the only
// argument is `Options`, otherwise as `TargetT` of the type of the first
// constructor argument, except that CTAD is deleted if the first constructor
// argument is a `VectorType&` or `const VectorType&` (to avoid writing to an
// unintentionally separate copy of an existing object).
//
// The `VectorType` must not be accessed until the `VectorWriter` is closed or
// no longer used, except that it is allowed to read the `VectorType`
// immediately after `Flush()`.
//
// `VectorWriter` with `UninitializedVector<char>`
// or `UninitializedInlinedVector<char, inlined_size>` is more efficient
// than `StringWriter` because the destination can be resized with uninitialized
// space. `CompactStringWriter` can also be used for this purpose.
template <typename Dest = UninitializedVector<char>*>
class VectorWriter
    : public ResizableWriter<
          vector_writer_internal::VectorResizableTraits<std::remove_pointer_t<
              typename Dependency<void*, TargetT<Dest>>::Subhandle>>,
          Dest> {
 public:
  using VectorWriter::ResizableWriter::ResizableWriter;

  VectorWriter(VectorWriter&& that) = default;
  VectorWriter& operator=(VectorWriter&& that) = default;
};

explicit VectorWriter(Closed) -> VectorWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit VectorWriter(Dest&& dest, VectorWriterBase::Options options =
                                       VectorWriterBase::Options())
    -> VectorWriter<std::conditional_t<
        std::conjunction_v<
            std::is_lvalue_reference<Dest>,
            std::is_convertible<std::remove_reference_t<Dest>*,
                                const std::remove_pointer_t<typename Dependency<
                                    void*, TargetT<Dest>>::Subhandle>*>>,
        DeleteCtad<Dest&&>, TargetT<Dest>>>;
explicit VectorWriter(
    VectorWriterBase::Options options = VectorWriterBase::Options())
    -> VectorWriter<UninitializedVector<char>>;

}  // namespace riegeli

#endif  // RIEGELI_BYTES_VECTOR_WRITER_H_


================================================
FILE: riegeli/bytes/wrapping_backward_writer.cc
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/wrapping_backward_writer.h"

#include <stddef.h>

#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"

namespace riegeli {

void WrappingBackwardWriterBase::Done() {
  if (ABSL_PREDICT_TRUE(ok())) {
    BackwardWriter& dest = *DestWriter();
    SyncBuffer(dest);
  }
  BackwardWriter::Done();
}

absl::Status WrappingBackwardWriterBase::AnnotateStatusImpl(
    absl::Status status) {
  // Fully delegate annotations to `*DestWriter()`.
  if (is_open()) {
    BackwardWriter& dest = *DestWriter();
    SyncBuffer(dest);
    status = dest.AnnotateStatus(std::move(status));
    MakeBuffer(dest);
  }
  return status;
}

bool WrappingBackwardWriterBase::PushSlow(size_t min_length,
                                          size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of BackwardWriter::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  BackwardWriter& dest = *DestWriter();
  SyncBuffer(dest);
  const bool push_ok = dest.Push(min_length, recommended_length);
  MakeBuffer(dest);
  return push_ok;
}

bool WrappingBackwardWriterBase::WriteSlow(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(string_view): "
         "enough space available, use Write(string_view) instead";
  return WriteInternal(src);
}

bool WrappingBackwardWriterBase::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  return WriteInternal(std::move(src));
}

bool WrappingBackwardWriterBase::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  return WriteInternal(src);
}

bool WrappingBackwardWriterBase::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  return WriteInternal(std::move(src));
}

bool WrappingBackwardWriterBase::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  return WriteInternal(src);
}

bool WrappingBackwardWriterBase::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  return WriteInternal(std::move(src));
}

bool WrappingBackwardWriterBase::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of BackwardWriter::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  return WriteInternal(src);
}

template <typename Src>
inline bool WrappingBackwardWriterBase::WriteInternal(Src&& src) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  BackwardWriter& dest = *DestWriter();
  SyncBuffer(dest);
  const bool write_ok = dest.Write(std::forward<Src>(src));
  MakeBuffer(dest);
  return write_ok;
}

bool WrappingBackwardWriterBase::SupportsTruncate() {
  BackwardWriter* const dest = DestWriter();
  return dest != nullptr && dest->SupportsTruncate();
}

bool WrappingBackwardWriterBase::TruncateImpl(Position new_size) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  BackwardWriter& dest = *DestWriter();
  SyncBuffer(dest);
  const bool truncate_ok = dest.Truncate(new_size);
  MakeBuffer(dest);
  return truncate_ok;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/wrapping_backward_writer.h
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_WRAPPING_BACKWARD_WRITER_H_
#define RIEGELI_BYTES_WRAPPING_BACKWARD_WRITER_H_

#include <stddef.h>

#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"

namespace riegeli {

// Template parameter independent part of `WrappingBackwardWriter`.
class WrappingBackwardWriterBase : public BackwardWriter {
 public:
  // Returns the original `BackwardWriter`. Unchanged by `Close()`.
  virtual BackwardWriter* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool SupportsTruncate() override;

 protected:
  using BackwardWriter::BackwardWriter;

  WrappingBackwardWriterBase(WrappingBackwardWriterBase&& that) noexcept;
  WrappingBackwardWriterBase& operator=(
      WrappingBackwardWriterBase&& that) noexcept;

  void Initialize(BackwardWriter* dest);

  // Sets cursor of `dest` to cursor of `*this`.
  void SyncBuffer(BackwardWriter& dest);

  // Sets buffer pointers of `*this` to buffer pointers of `dest`. Fails `*this`
  // if `dest` failed.
  void MakeBuffer(BackwardWriter& dest);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using BackwardWriter::WriteSlow;
  bool WriteSlow(absl::string_view src) override;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(absl::Cord&& src) override;
  bool WriteSlow(ByteFill src) override;
  bool TruncateImpl(Position new_size) override;

 private:
  // This template is defined and used only in wrapping_backward_writer.cc.
  template <typename Src>
  bool WriteInternal(Src&& src);

  // Invariants if `ok()`:
  //   `start() == DestWriter()->start()`
  //   `limit() == DestWriter()->limit()`
  //   `start_pos() == DestWriter()->start_pos()`
};

// A `BackwardWriter` which juts writes to another `BackwardWriter`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the original `BackwardWriter`. `Dest` must support
// `Dependency<BackwardWriter*, Dest>`, e.g.
// `BackwardWriter*` (not owned, default),
// `ChainBackwardWriter<>` (owned), `std::unique_ptr<BackwardWriter>` (owned),
// `Any<BackwardWriter*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The original `BackwardWriter` must not be accessed until the
// `WrappingBackwardWriter` is closed or no longer used, except that it is
// allowed to read the destination of the original `BackwardWriter` immediately
// after `Flush()`.
template <typename Dest = BackwardWriter*>
class WrappingBackwardWriter : public WrappingBackwardWriterBase {
 public:
  // Creates a closed `WrappingBackwardWriter`.
  explicit WrappingBackwardWriter(Closed) noexcept
      : WrappingBackwardWriterBase(kClosed) {}

  // Will write to the original `BackwardWriter` provided by `dest`.
  explicit WrappingBackwardWriter(Initializer<Dest> dest);

  WrappingBackwardWriter(WrappingBackwardWriter&& that) = default;
  WrappingBackwardWriter& operator=(WrappingBackwardWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `WrappingBackwardWriter`.
  // This avoids constructing a temporary `WrappingBackwardWriter` and moving
  // from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest);

  // Returns the object providing and possibly owning the original
  // `BackwardWriter`. Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  BackwardWriter* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  class Mover;

  // The object providing and possibly owning the original `BackwardWriter`.
  MovingDependency<BackwardWriter*, Dest, Mover> dest_;
};

explicit WrappingBackwardWriter(Closed)
    -> WrappingBackwardWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit WrappingBackwardWriter(Dest&& dest)
    -> WrappingBackwardWriter<TargetT<Dest>>;

// Implementation details follow.

inline WrappingBackwardWriterBase::WrappingBackwardWriterBase(
    WrappingBackwardWriterBase&& that) noexcept
    : BackwardWriter(static_cast<BackwardWriter&&>(that)) {}

inline WrappingBackwardWriterBase& WrappingBackwardWriterBase::operator=(
    WrappingBackwardWriterBase&& that) noexcept {
  BackwardWriter::operator=(static_cast<BackwardWriter&&>(that));
  return *this;
}

inline void WrappingBackwardWriterBase::Initialize(BackwardWriter* dest) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of WrappingBackwardWriter: "
         "null BackwardWriter pointer";
  MakeBuffer(*dest);
}

inline void WrappingBackwardWriterBase::SyncBuffer(BackwardWriter& dest) {
  dest.set_cursor(cursor());
}

inline void WrappingBackwardWriterBase::MakeBuffer(BackwardWriter& dest) {
  set_buffer(dest.limit(), dest.start_to_limit(), dest.start_to_cursor());
  set_start_pos(dest.start_pos());
  if (ABSL_PREDICT_FALSE(!dest.ok())) FailWithoutAnnotation(dest.status());
}

template <typename Dest>
class WrappingBackwardWriter<Dest>::Mover {
 public:
  static auto member() { return &WrappingBackwardWriter::dest_; }

  explicit Mover(WrappingBackwardWriter& self, WrappingBackwardWriter& that)
      : uses_buffer_(self.start() != nullptr) {
    // Buffer pointers are already moved so `SyncBuffer()` is called on `self`.
    // `dest_` is not moved yet so `dest_` is taken from `that`.
    if (uses_buffer_) self.SyncBuffer(*that.dest_);
  }

  void Done(WrappingBackwardWriter& self) {
    if (uses_buffer_) self.MakeBuffer(*self.dest_);
  }

 private:
  bool uses_buffer_;
};

template <typename Dest>
inline WrappingBackwardWriter<Dest>::WrappingBackwardWriter(
    Initializer<Dest> dest)
    : dest_(std::move(dest)) {
  Initialize(dest_.get());
}

template <typename Dest>
inline void WrappingBackwardWriter<Dest>::Reset(Closed) {
  WrappingBackwardWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void WrappingBackwardWriter<Dest>::Reset(Initializer<Dest> dest) {
  WrappingBackwardWriterBase::Reset();
  dest_.Reset(std::move(dest));
  Initialize(dest_.get());
}

template <typename Dest>
void WrappingBackwardWriter<Dest>::Done() {
  WrappingBackwardWriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(dest_->status());
    }
  }
}

template <typename Dest>
void WrappingBackwardWriter<Dest>::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  if (dest_.IsOwning()) {
    SyncBuffer(*dest_);
    dest_->SetWriteSizeHint(write_size_hint);
    MakeBuffer(*dest_);
  }
}

template <typename Dest>
bool WrappingBackwardWriter<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer(*dest_);
  bool flush_ok = true;
  if (flush_type != FlushType::kFromObject || dest_.IsOwning()) {
    flush_ok = dest_->Flush(flush_type);
  }
  MakeBuffer(*dest_);
  return flush_ok;
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_WRAPPING_BACKWARD_WRITER_H_


================================================
FILE: riegeli/bytes/wrapping_reader.cc
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/wrapping_reader.h"

#include <stddef.h>

#include <limits>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void WrappingReaderBase::Done() {
  if (ABSL_PREDICT_TRUE(ok())) {
    Reader& src = *SrcReader();
    SyncBuffer(src);
  }
  Reader::Done();
}

absl::Status WrappingReaderBase::AnnotateStatusImpl(absl::Status status) {
  // Fully delegate annotations to `*SrcReader()`.
  if (is_open()) {
    Reader& src = *SrcReader();
    SyncBuffer(src);
    status = src.AnnotateStatus(std::move(status));
    MakeBuffer(src);
  }
  return status;
}

bool WrappingReaderBase::PullSlow(size_t min_length,
                                  size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Reader::PullSlow(): "
         "enough data available, use Pull() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool pull_ok = src.Pull(min_length, recommended_length);
  MakeBuffer(src);
  return pull_ok;
}

bool WrappingReaderBase::ReadSlow(size_t length, char* dest) {
  RIEGELI_ASSERT_LT(available(), length)
      << "Failed precondition of Reader::ReadSlow(char*): "
         "enough data available, use Read(char*) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool read_ok = src.Read(length, dest);
  MakeBuffer(src);
  return read_ok;
}

bool WrappingReaderBase::ReadSlow(size_t length, Chain& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "enough data available, use Read(Chain&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "Chain size overflow";
  return ReadInternal(length, dest);
}

bool WrappingReaderBase::ReadSlow(size_t length, absl::Cord& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "enough data available, use Read(Cord&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "Cord size overflow";
  return ReadInternal(length, dest);
}

template <typename Dest>
inline bool WrappingReaderBase::ReadInternal(size_t length, Dest& dest) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool read_ok = src.ReadAndAppend(length, dest);
  MakeBuffer(src);
  return read_ok;
}

bool WrappingReaderBase::CopySlow(Position length, Writer& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::CopySlow(Writer&): "
         "enough data available, use Copy(Writer&) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool copy_ok = src.Copy(length, dest);
  MakeBuffer(src);
  return copy_ok;
}

bool WrappingReaderBase::CopySlow(size_t length, BackwardWriter& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::CopySlow(BackwardWriter&): "
         "enough data available, use Copy(BackwardWriter&) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool copy_ok = src.Copy(length, dest);
  MakeBuffer(src);
  return copy_ok;
}

bool WrappingReaderBase::ReadSomeSlow(size_t max_length, char* dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "nothing to read, use ReadSome(char*) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "some data available, use ReadSome(char*) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool read_ok = src.ReadSome(max_length, dest);
  MakeBuffer(src);
  return read_ok;
}

bool WrappingReaderBase::CopySomeSlow(size_t max_length, Writer& dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::CopySomeSlow(Writer&): "
         "nothing to copy, use CopySome(Writer&) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::CopySomeSlow(Writer&): "
         "some data available, use CopySome(Writer&) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool copy_ok = src.CopySome(max_length, dest);
  MakeBuffer(src);
  return copy_ok;
}

void WrappingReaderBase::ReadHintSlow(size_t min_length,
                                      size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Reader::ReadHintSlow(): "
         "enough data available, use ReadHint() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  src.ReadHint(min_length, recommended_length);
  MakeBuffer(src);
}

bool WrappingReaderBase::ToleratesReadingAhead() {
  Reader* const src = SrcReader();
  return src != nullptr && src->ToleratesReadingAhead();
}

bool WrappingReaderBase::SupportsRandomAccess() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsRandomAccess();
}

bool WrappingReaderBase::SupportsRewind() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsRewind();
}

bool WrappingReaderBase::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of Reader::SeekSlow(): "
         "position in the buffer, use Seek() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const bool seek_ok = src.Seek(new_pos);
  MakeBuffer(src);
  return seek_ok;
}

bool WrappingReaderBase::SupportsSize() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsSize();
}

std::optional<Position> WrappingReaderBase::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  Reader& src = *SrcReader();
  SyncBuffer(src);
  const std::optional<Position> size = src.Size();
  MakeBuffer(src);
  return size;
}

bool WrappingReaderBase::SupportsNewReader() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsNewReader();
}

std::unique_ptr<Reader> WrappingReaderBase::NewReaderImpl(
    Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point
  // if `SrcReader()->SupportsNewReader()`.
  Reader& src = *SrcReader();
  std::unique_ptr<Reader> reader = src.NewReader(initial_pos);
  if (ABSL_PREDICT_FALSE(reader == nullptr)) {
    FailWithoutAnnotation(src.status());
  }
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/wrapping_reader.h
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_WRAPPING_READER_H_
#define RIEGELI_BYTES_WRAPPING_READER_H_

#include <stddef.h>

#include <memory>
#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

class BackwardWriter;
class Writer;

// Template parameter independent part of `WrappingReader`.
class WrappingReaderBase : public Reader {
 public:
  // Returns the original `Reader`. Unchanged by `Close()`.
  virtual Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool ToleratesReadingAhead() override;
  bool SupportsRandomAccess() override;
  bool SupportsRewind() override;
  bool SupportsSize() override;
  bool SupportsNewReader() override;

 protected:
  using Reader::Reader;

  WrappingReaderBase(WrappingReaderBase&& that) noexcept;
  WrappingReaderBase& operator=(WrappingReaderBase&& that) noexcept;

  void Initialize(Reader* src);

  // Sets cursor of `src` to cursor of `*this`.
  void SyncBuffer(Reader& src);

  // Sets buffer pointers of `*this` to buffer pointers of `src`. Fails `*this`
  // if `src` failed.
  void MakeBuffer(Reader& src);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool PullSlow(size_t min_length, size_t recommended_length) override;
  using Reader::ReadSlow;
  bool ReadSlow(size_t length, char* dest) override;
  bool ReadSlow(size_t length, Chain& dest) override;
  bool ReadSlow(size_t length, absl::Cord& dest) override;
  using Reader::CopySlow;
  bool CopySlow(Position length, Writer& dest) override;
  bool CopySlow(size_t length, BackwardWriter& dest) override;
  using Reader::ReadSomeSlow;
  bool ReadSomeSlow(size_t max_length, char* dest) override;
  using Reader::CopySomeSlow;
  bool CopySomeSlow(size_t max_length, Writer& dest) override;
  void ReadHintSlow(size_t min_length, size_t recommended_length) override;
  bool SeekSlow(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;

 private:
  // This template is defined and used only in wrapping_reader.cc.
  template <typename Dest>
  bool ReadInternal(size_t length, Dest& dest);

  // Invariants if `is_open()`:
  //   `start() == SrcReader()->start()`
  //   `limit() == SrcReader()->limit()`
  //   `limit_pos() == SrcReader()->limit_pos()`
};

// A `Reader` which just reads from another `Reader`.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the original `Reader`. `Src` must support
// `Dependency<Reader*, Src>`, e.g. `Reader*` (not owned, default),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The original `Reader` must not be accessed until the `WrappingReader` is
// closed or no longer used.
template <typename Src = Reader*>
class WrappingReader : public WrappingReaderBase {
 public:
  // Creates a closed `WrappingReader`.
  explicit WrappingReader(Closed) noexcept : WrappingReaderBase(kClosed) {}

  // Will read from the original `Reader` provided by `src`.
  explicit WrappingReader(Initializer<Src> src);

  WrappingReader(WrappingReader&& that) = default;
  WrappingReader& operator=(WrappingReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `WrappingReader`. This
  // avoids constructing a temporary `WrappingReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src);

  // Returns the object providing and possibly owning the original `Reader`.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;
  void SetReadAllHintImpl(bool read_all_hint) override;
  void VerifyEndImpl() override;
  bool SyncImpl(SyncType sync_type) override;

 private:
  class Mover;

  // The object providing and possibly owning the original `Reader`.
  MovingDependency<Reader*, Src, Mover> src_;
};

explicit WrappingReader(Closed) -> WrappingReader<DeleteCtad<Closed>>;
template <typename Src>
explicit WrappingReader(Src&& src) -> WrappingReader<TargetT<Src>>;

// Implementation details follow.

inline WrappingReaderBase::WrappingReaderBase(
    WrappingReaderBase&& that) noexcept
    : Reader(static_cast<Reader&&>(that)) {}

inline WrappingReaderBase& WrappingReaderBase::operator=(
    WrappingReaderBase&& that) noexcept {
  Reader::operator=(static_cast<Reader&&>(that));
  return *this;
}

inline void WrappingReaderBase::Initialize(Reader* src) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of WrappingReader: null Reader pointer";
  MakeBuffer(*src);
}

inline void WrappingReaderBase::SyncBuffer(Reader& src) {
  src.set_cursor(cursor());
}

inline void WrappingReaderBase::MakeBuffer(Reader& src) {
  set_buffer(src.start(), src.start_to_limit(), src.start_to_cursor());
  set_limit_pos(src.limit_pos());
  if (ABSL_PREDICT_FALSE(!src.ok())) FailWithoutAnnotation(src.status());
}

template <typename Src>
class WrappingReader<Src>::Mover {
 public:
  static auto member() { return &WrappingReader::src_; }

  explicit Mover(WrappingReader& self, WrappingReader& that)
      : uses_buffer_(self.start() != nullptr) {
    // Buffer pointers are already moved so `SyncBuffer()` is called on `self`.
    // `src_` is not moved yet so `src_` is taken from `that`.
    if (uses_buffer_) self.SyncBuffer(*that.src_);
  }

  void Done(WrappingReader& self) {
    if (uses_buffer_) self.MakeBuffer(*self.src_);
  }

 private:
  bool uses_buffer_;
};

template <typename Src>
inline WrappingReader<Src>::WrappingReader(Initializer<Src> src)
    : src_(std::move(src)) {
  Initialize(src_.get());
}

template <typename Src>
inline void WrappingReader<Src>::Reset(Closed) {
  WrappingReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void WrappingReader<Src>::Reset(Initializer<Src> src) {
  WrappingReaderBase::Reset();
  src_.Reset(std::move(src));
  Initialize(src_.get());
}

template <typename Src>
void WrappingReader<Src>::Done() {
  WrappingReaderBase::Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      FailWithoutAnnotation(src_->status());
    }
  }
}

template <typename Src>
void WrappingReader<Src>::SetReadAllHintImpl(bool read_all_hint) {
  WrappingReaderBase::SetReadAllHintImpl(read_all_hint);
  if (src_.IsOwning()) {
    SyncBuffer(*src_);
    src_->SetReadAllHint(read_all_hint);
    MakeBuffer(*src_);
  }
}

template <typename Src>
void WrappingReader<Src>::VerifyEndImpl() {
  if (!src_.IsOwning()) {
    WrappingReaderBase::VerifyEndImpl();
  } else if (ABSL_PREDICT_TRUE(ok())) {
    SyncBuffer(*src_);
    src_->VerifyEnd();
    MakeBuffer(*src_);
  }
}

template <typename Src>
bool WrappingReader<Src>::SyncImpl(SyncType sync_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer(*src_);
  bool sync_ok = true;
  if (sync_type != SyncType::kFromObject || src_.IsOwning()) {
    sync_ok = src_->Sync(sync_type);
  }
  MakeBuffer(*src_);
  return sync_ok;
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_WRAPPING_READER_H_


================================================
FILE: riegeli/bytes/wrapping_writer.cc
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/wrapping_writer.h"

#include <stddef.h>

#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

void WrappingWriterBase::Done() {
  if (ABSL_PREDICT_TRUE(ok())) {
    Writer& dest = *DestWriter();
    SyncBuffer(dest);
  }
  Writer::Done();
}

absl::Status WrappingWriterBase::AnnotateStatusImpl(absl::Status status) {
  // Fully delegate annotations to `*DestWriter()`.
  if (is_open()) {
    Writer& dest = *DestWriter();
    SyncBuffer(dest);
    status = dest.AnnotateStatus(std::move(status));
    MakeBuffer(dest);
  }
  return status;
}

bool WrappingWriterBase::PushSlow(size_t min_length,
                                  size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Writer::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  SyncBuffer(dest);
  const bool push_ok = dest.Push(min_length, recommended_length);
  MakeBuffer(dest);
  return push_ok;
}

bool WrappingWriterBase::WriteSlow(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of Writer::WriteSlow(string_view): "
         "enough space available, use Write(string_view) instead";
  return WriteInternal(src);
}

bool WrappingWriterBase::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  return WriteInternal(std::move(src));
}

bool WrappingWriterBase::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  return WriteInternal(src);
}

bool WrappingWriterBase::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  return WriteInternal(std::move(src));
}

bool WrappingWriterBase::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  return WriteInternal(src);
}

bool WrappingWriterBase::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  return WriteInternal(std::move(src));
}

bool WrappingWriterBase::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  return WriteInternal(src);
}

template <typename Src>
inline bool WrappingWriterBase::WriteInternal(Src&& src) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  SyncBuffer(dest);
  const bool write_ok = dest.Write(std::forward<Src>(src));
  MakeBuffer(dest);
  return write_ok;
}

bool WrappingWriterBase::SupportsRandomAccess() {
  Writer* const dest = DestWriter();
  return dest != nullptr && dest->SupportsRandomAccess();
}

bool WrappingWriterBase::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT_NE(new_pos, pos())
      << "Failed precondition of Writer::SeekSlow(): "
         "position unchanged, use Seek() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  SyncBuffer(dest);
  const bool seek_ok = dest.Seek(new_pos);
  MakeBuffer(dest);
  return seek_ok;
}

std::optional<Position> WrappingWriterBase::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  Writer& dest = *DestWriter();
  SyncBuffer(dest);
  const std::optional<Position> size = dest.Size();
  MakeBuffer(dest);
  return size;
}

bool WrappingWriterBase::SupportsTruncate() {
  Writer* const dest = DestWriter();
  return dest != nullptr && dest->SupportsTruncate();
}

bool WrappingWriterBase::TruncateImpl(Position new_size) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  SyncBuffer(dest);
  const bool truncate_ok = dest.Truncate(new_size);
  MakeBuffer(dest);
  return truncate_ok;
}

bool WrappingWriterBase::SupportsReadMode() {
  Writer* const dest = DestWriter();
  return dest != nullptr && dest->SupportsReadMode();
}

Reader* WrappingWriterBase::ReadModeImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  Writer& dest = *DestWriter();
  SyncBuffer(dest);
  Reader* const reader = dest.ReadMode(initial_pos);
  MakeBuffer(dest);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/wrapping_writer.h
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_WRAPPING_WRITER_H_
#define RIEGELI_BYTES_WRAPPING_WRITER_H_

#include <stddef.h>

#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

class Reader;

// Template parameter independent part of `WrappingWriter`.
class WrappingWriterBase : public Writer {
 public:
  // Returns the original `Writer`. Unchanged by `Close()`.
  virtual Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool SupportsRandomAccess() override;
  bool SupportsTruncate() override;
  bool SupportsReadMode() override;

 protected:
  using Writer::Writer;

  WrappingWriterBase(WrappingWriterBase&& that) noexcept;
  WrappingWriterBase& operator=(WrappingWriterBase&& that) noexcept;

  void Initialize(Writer* dest);

  // Sets cursor of `dest` to cursor of `*this`.
  void SyncBuffer(Writer& dest);

  // Sets buffer pointers of `*this` to buffer pointers of `dest`. Fails `*this`
  // if `dest` failed.
  void MakeBuffer(Writer& dest);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using Writer::WriteSlow;
  bool WriteSlow(absl::string_view src) override;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(absl::Cord&& src) override;
  bool WriteSlow(ByteFill src) override;
  bool SeekSlow(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  bool TruncateImpl(Position new_size) override;
  Reader* ReadModeImpl(Position initial_pos) override;

 private:
  // This template is defined and used only in wrapping_writer.cc.
  template <typename Src>
  bool WriteInternal(Src&& src);

  // Invariants if `ok()`:
  //   `start() == DestWriter()->start()`
  //   `limit() == DestWriter()->limit()`
  //   `start_pos() == DestWriter()->start_pos()`
};

// A `Writer` which just writes to another `Writer`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the original `Writer`. `Dest` must support
// `Dependency<Writer*, Dest>`, e.g. `Writer*` (not owned, default),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The original `Writer` must not be accessed until the `WrappingWriter` is
// closed or no longer used, except that it is allowed to read the destination
// of the original `Writer` immediately after `Flush()`.
template <typename Dest = Writer*>
class WrappingWriter : public WrappingWriterBase {
 public:
  // Creates a closed `WrappingWriter`.
  explicit WrappingWriter(Closed) noexcept : WrappingWriterBase(kClosed) {}

  // Will write to the original `Writer` provided by `dest`.
  explicit WrappingWriter(Initializer<Dest> dest);

  WrappingWriter(WrappingWriter&& that) = default;
  WrappingWriter& operator=(WrappingWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `WrappingWriter`. This
  // avoids constructing a temporary `WrappingWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest);

  // Returns the object providing and possibly owning the original `Writer`.
  // Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  class Mover;

  // The object providing and possibly owning the original `Writer`.
  MovingDependency<Writer*, Dest, Mover> dest_;
};

explicit WrappingWriter(Closed) -> WrappingWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit WrappingWriter(Dest&& dest) -> WrappingWriter<TargetT<Dest>>;

// Implementation details follow.

inline WrappingWriterBase::WrappingWriterBase(
    WrappingWriterBase&& that) noexcept
    : Writer(static_cast<Writer&&>(that)) {}

inline WrappingWriterBase& WrappingWriterBase::operator=(
    WrappingWriterBase&& that) noexcept {
  Writer::operator=(static_cast<Writer&&>(that));
  return *this;
}

inline void WrappingWriterBase::Initialize(Writer* dest) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of WrappingWriter: null Writer pointer";
  MakeBuffer(*dest);
}

inline void WrappingWriterBase::SyncBuffer(Writer& dest) {
  dest.set_cursor(cursor());
}

inline void WrappingWriterBase::MakeBuffer(Writer& dest) {
  set_buffer(dest.start(), dest.start_to_limit(), dest.start_to_cursor());
  set_start_pos(dest.start_pos());
  if (ABSL_PREDICT_FALSE(!dest.ok())) FailWithoutAnnotation(dest.status());
}

template <typename Dest>
class WrappingWriter<Dest>::Mover {
 public:
  static auto member() { return &WrappingWriter::dest_; }

  explicit Mover(WrappingWriter& self, WrappingWriter& that)
      : uses_buffer_(self.start() != nullptr) {
    // Buffer pointers are already moved so `SyncBuffer()` is called on `self`.
    // `dest_` is not moved yet so `dest_` is taken from `that`.
    if (uses_buffer_) self.SyncBuffer(*that.dest_);
  }

  void Done(WrappingWriter& self) {
    if (uses_buffer_) self.MakeBuffer(*self.dest_);
  }

 private:
  bool uses_buffer_;
};

template <typename Dest>
inline WrappingWriter<Dest>::WrappingWriter(Initializer<Dest> dest)
    : dest_(std::move(dest)) {
  Initialize(dest_.get());
}

template <typename Dest>
inline void WrappingWriter<Dest>::Reset(Closed) {
  WrappingWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void WrappingWriter<Dest>::Reset(Initializer<Dest> dest) {
  WrappingWriterBase::Reset();
  dest_.Reset(std::move(dest));
  Initialize(dest_.get());
}

template <typename Dest>
void WrappingWriter<Dest>::Done() {
  WrappingWriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(dest_->status());
    }
  }
}

template <typename Dest>
void WrappingWriter<Dest>::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  if (dest_.IsOwning()) {
    SyncBuffer(*dest_);
    dest_->SetWriteSizeHint(write_size_hint);
    MakeBuffer(*dest_);
  }
}

template <typename Dest>
bool WrappingWriter<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer(*dest_);
  bool flush_ok = true;
  if (flush_type != FlushType::kFromObject || dest_.IsOwning()) {
    flush_ok = dest_->Flush(flush_type);
  }
  MakeBuffer(*dest_);
  return flush_ok;
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_WRAPPING_WRITER_H_


================================================
FILE: riegeli/bytes/write.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_WRITE_H_
#define RIEGELI_BYTES_WRITE_H_

#include <stddef.h>

#include <tuple>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/type_traits.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/stringify.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

// Combines creating a `Writer` / `BackwardWriter` (optionally), calling
// `Write()`, and `Close()` (if the `Writer` / `BackwardWriter` is owned).
//
// The last argument is the destination of some type `Dest`. The remaining
// arguments are the values.
//
// `Dest` specifies the type of the object providing and possibly owning the
// `Writer` / `BackwardWriter`. `Dest` must support
// `DependencyRef<Writer*, Dest>`, e.g. `Writer&` (not owned),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `AnyRef<Writer*>` (maybe owned). Analogously for `BackwardWriter`.

template <
    typename... Args,
    std::enable_if_t<
        std::conjunction_v<
            TargetRefSupportsDependency<Writer*, GetTypeFromEndT<1, Args...>>,
            TupleElementsSatisfy<RemoveTypesFromEndT<1, Args&&...>,
                                 IsStringifiable>>,
        int> = 0>
absl::Status Write(Args&&... args);
template <
    typename... Args,
    std::enable_if_t<std::conjunction_v<
                         TargetRefSupportsDependency<
                             BackwardWriter*, GetTypeFromEndT<1, Args...>>,
                         TupleElementsSatisfy<RemoveTypesFromEndT<1, Args&&...>,
                                              IsStringifiable>>,
                     int> = 0>
absl::Status Write(Args&&... args);

// Implementation details follow.

namespace write_internal {

template <typename... Srcs, typename Dest>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline absl::Status WriteInternal(
    ABSL_ATTRIBUTE_UNUSED std::tuple<Srcs...> srcs, Dest&& dest) {
  DependencyRef<Writer*, Dest> dest_dep(std::forward<Dest>(dest));
  if constexpr (HasStringifiedSize<Srcs...>::value) {
    if (dest_dep.IsOwning()) {
      dest_dep->SetWriteSizeHint(std::apply(
          [](const Srcs&... srcs) { return riegeli::StringifiedSize(srcs...); },
          srcs));
    }
  }
  absl::Status status;
  if (ABSL_PREDICT_FALSE(!std::apply(
          [&](Srcs&&... srcs) {
            return dest_dep->Write(std::forward<Srcs>(srcs)...);
          },
          std::move(srcs)))) {
    status = dest_dep->status();
  }
  if (dest_dep.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_dep->Close())) {
      status.Update(dest_dep->status());
    }
  }
  return status;
}

template <typename... Srcs, typename Dest>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline absl::Status BackwardWriteInternal(
    ABSL_ATTRIBUTE_UNUSED std::tuple<Srcs...> srcs, Dest&& dest) {
  DependencyRef<BackwardWriter*, Dest> dest_dep(std::forward<Dest>(dest));
  if constexpr (HasStringifiedSize<Srcs...>::value) {
    if (dest_dep.IsOwning()) {
      dest_dep->SetWriteSizeHint(std::apply(
          [](const Srcs&... srcs) { return riegeli::StringifiedSize(srcs...); },
          srcs));
    }
  }
  absl::Status status;
  if (ABSL_PREDICT_FALSE(!std::apply(
          [&](Srcs&&... srcs) {
            return dest_dep->Write(std::forward<Srcs>(srcs)...);
          },
          std::move(srcs)))) {
    status = dest_dep->status();
  }
  if (dest_dep.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_dep->Close())) {
      status.Update(dest_dep->status());
    }
  }
  return status;
}

}  // namespace write_internal

template <
    typename... Args,
    std::enable_if_t<
        std::conjunction_v<
            TargetRefSupportsDependency<Writer*, GetTypeFromEndT<1, Args...>>,
            TupleElementsSatisfy<RemoveTypesFromEndT<1, Args&&...>,
                                 IsStringifiable>>,
        int>>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline absl::Status Write(Args&&... args) {
  return write_internal::WriteInternal(
      RemoveFromEnd<1>(std::forward<Args>(args)...),
      GetFromEnd<1>(std::forward<Args>(args)...));
}

template <
    typename... Args,
    std::enable_if_t<std::conjunction_v<
                         TargetRefSupportsDependency<
                             BackwardWriter*, GetTypeFromEndT<1, Args...>>,
                         TupleElementsSatisfy<RemoveTypesFromEndT<1, Args&&...>,
                                              IsStringifiable>>,
                     int>>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline absl::Status Write(Args&&... args) {
  return write_internal::BackwardWriteInternal(
      RemoveFromEnd<1>(std::forward<Args>(args)...),
      GetFromEnd<1>(std::forward<Args>(args)...));
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_WRITE_H_


================================================
FILE: riegeli/bytes/write_int_internal.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/write_int_internal.h"

#include <stddef.h>
#include <stdint.h>

#include <array>
#include <cstring>
#include <limits>
#include <type_traits>

#include "absl/base/attributes.h"
#include "absl/numeric/bits.h"
#include "absl/numeric/int128.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"

namespace riegeli::write_int_internal {

namespace {

// `WriteDec{1,2}Impl()` write a fixed number of digits.

inline char* WriteDec1Impl(uint32_t src, char* dest) {
  RIEGELI_ASSERT_LT(src, 10u)
      << "Failed precondition of WriteDec1Impl(): value too large";
  *dest = '0' + static_cast<char>(src);
  return dest + 1;
}

inline char* WriteDec2Impl(uint32_t src, char* dest) {
  RIEGELI_ASSERT_LT(src, 100u)
      << "Failed precondition of WriteDec2Impl(): value too large";
  static constexpr char kTwoDigits[100][2] = {
      {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'},
      {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'},
      {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'},
      {'1', '8'}, {'1', '9'}, {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'},
      {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
      {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'},
      {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'},
      {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'},
      {'4', '8'}, {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'},
      {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
      {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'},
      {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, {'7', '0'}, {'7', '1'},
      {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'},
      {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'},
      {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
      {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'},
      {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}};
  std::memcpy(dest, kTwoDigits[src], 2);
  return dest + 2;
}

// `WriteDecImpl()` writes at least `width` digits.

// Inline to optimize for a constant `width`.
ABSL_ATTRIBUTE_ALWAYS_INLINE
inline char* WriteDecImpl(uint32_t src, char* dest, size_t width) {
  uint32_t digits;

  if (src < 100 && width <= 2) {
    if (src >= 10 || width == 2) goto write_2_digits;
    return WriteDec1Impl(src, dest);
  }
  if (src < 10'000 && width <= 4) {
    if (src >= 1'000 || width == 4) goto write_4_digits;
    digits = src / 100;
    src %= 100;
    dest = WriteDec1Impl(digits, dest);
    goto write_2_digits;
  }
  if (src < 1'000'000 && width <= 6) {
    if (src >= 100'000 || width == 6) goto write_6_digits;
    digits = src / 10'000;
    src %= 10'000;
    dest = WriteDec1Impl(digits, dest);
    goto write_4_digits;
  }
  if (src < 100'000'000 && width <= 8) {
    if (src >= 10'000'000 || width == 8) goto write_8_digits;
    digits = src / 1'000'000;
    src %= 1'000'000;
    dest = WriteDec1Impl(digits, dest);
    goto write_6_digits;
  }
  if (src < 1'000'000'000 && width <= 9) {
    digits = src / 100'000'000;
    src %= 100'000'000;
    dest = WriteDec1Impl(digits, dest);
    goto write_8_digits;
  }

  if (width > 10) {
    // Redundant condition suppresses gcc warning `-Wstringop-overflow`.
    std::memset(dest, '0', width > 10 ? width - 10 : 0);
    dest += width - 10;
  }
  digits = src / 100'000'000;
  src %= 100'000'000;
  dest = WriteDec2Impl(digits, dest);
write_8_digits:
  digits = src / 1'000'000;
  src %= 1'000'000;
  dest = WriteDec2Impl(digits, dest);
write_6_digits:
  digits = src / 10'000;
  src %= 10'000;
  dest = WriteDec2Impl(digits, dest);
write_4_digits:
  digits = src / 100;
  src %= 100;
  dest = WriteDec2Impl(digits, dest);
write_2_digits:
  return WriteDec2Impl(src, dest);
}

// Inline to optimize for a constant `width`.
ABSL_ATTRIBUTE_ALWAYS_INLINE
inline char* WriteDecImpl(uint64_t src, char* dest, size_t width) {
  if (src <= std::numeric_limits<uint32_t>::max()) {
    return WriteDecImpl(IntCast<uint32_t>(src), dest, width);
  }
  // `src` needs at least 10 digits.
  if (src >= 10'000'000'000 || width > 10) {
    // `src` needs more than 10 digits.
    const uint64_t over_10_digits = src / 10'000'000'000;
    src %= 10'000'000'000;
    dest = WriteDecImpl(IntCast<uint32_t>(over_10_digits), dest,
                        SaturatingSub(width, size_t{10}));
  }
  // Now `src < 1e10`. Write `src` using exactly 10 digits. Leading zeros are
  // needed for the case where the original `src` needed more than 10 digits or
  // `width > 10`.
  const uint32_t digits = IntCast<uint32_t>(src / 100'000'000);
  src %= 100'000'000;
  dest = WriteDec2Impl(digits, dest);
  return WriteDecImpl(IntCast<uint32_t>(src), dest, 8);
}

// Inline to optimize for a constant `width`.
ABSL_ATTRIBUTE_ALWAYS_INLINE
inline char* WriteDecImpl(absl::uint128 src, char* dest, size_t width) {
  if (src <= std::numeric_limits<uint64_t>::max()) {
    return WriteDecImpl(IntCast<uint64_t>(src), dest, width);
  }
  // `src` needs at least 20 digits.
  constexpr absl::uint128 k1e20 = absl::MakeUint128(5, 0x6bc75e2d63100000);
  RIEGELI_ASSERT_EQ(
      k1e20, absl::uint128(10'000'000'000) * absl::uint128(10'000'000'000));
  if (src >= k1e20 || width > 20) {
    // `src` needs more than 20 digits.
    const absl::uint128 over_20_digits = src / k1e20;
    src %= k1e20;
    dest = WriteDecImpl(IntCast<uint64_t>(over_20_digits), dest,
                        SaturatingSub(width, size_t{20}));
  }
  // Now `src < 1e20`. Write `src` using exactly 20 digits. Leading zeros are
  // needed for the case where the original `src` needed more than 20 digits or
  // `width > 20`.
  const uint32_t digits = IntCast<uint32_t>(src / 1'000'000'000'000'000'000);
  src %= 1'000'000'000'000'000'000;
  dest = WriteDec2Impl(digits, dest);
  return WriteDecImpl(IntCast<uint64_t>(src), dest, 18);
}

}  // namespace

char* WriteDec(uint32_t src, char* dest) { return WriteDecImpl(src, dest, 0); }

char* WriteDec(uint64_t src, char* dest) { return WriteDecImpl(src, dest, 0); }

char* WriteDec(absl::uint128 src, char* dest) {
  return WriteDecImpl(src, dest, 0);
}

char* WriteDec(int32_t src, char* dest) {
  uint32_t abs_value;
  if (src >= 0) {
    abs_value = UnsignedCast(src);
  } else {
    *dest = '-';
    ++dest;
    abs_value = NegatingUnsignedCast(src);
  }
  return WriteDec(abs_value, dest);
}

char* WriteDec(int64_t src, char* dest) {
  uint64_t abs_value;
  if (src >= 0) {
    abs_value = UnsignedCast(src);
  } else {
    *dest = '-';
    ++dest;
    abs_value = NegatingUnsignedCast(src);
  }
  return WriteDec(abs_value, dest);
}

char* WriteDec(absl::int128 src, char* dest) {
  absl::uint128 abs_value;
  if (src >= 0) {
    abs_value = UnsignedCast(src);
  } else {
    *dest = '-';
    ++dest;
    abs_value = NegatingUnsignedCast(src);
  }
  return WriteDec(abs_value, dest);
}

char* WriteDec(uint32_t src, char* dest, size_t width) {
  return WriteDecImpl(src, dest, width);
}

char* WriteDec(uint64_t src, char* dest, size_t width) {
  return WriteDecImpl(src, dest, width);
}

char* WriteDec(absl::uint128 src, char* dest, size_t width) {
  return WriteDecImpl(src, dest, width);
}

char* WriteDec(int32_t src, char* dest, size_t width) {
  uint32_t abs_value;
  if (src >= 0) {
    abs_value = UnsignedCast(src);
  } else {
    *dest = '-';
    ++dest;
    abs_value = NegatingUnsignedCast(src);
    width = SaturatingSub(width, size_t{1});
  }
  return WriteDec(abs_value, dest, width);
}

char* WriteDec(int64_t src, char* dest, size_t width) {
  uint64_t abs_value;
  if (src >= 0) {
    abs_value = UnsignedCast(src);
  } else {
    *dest = '-';
    ++dest;
    abs_value = NegatingUnsignedCast(src);
    width = SaturatingSub(width, size_t{1});
  }
  return WriteDec(abs_value, dest, width);
}

char* WriteDec(absl::int128 src, char* dest, size_t width) {
  absl::uint128 abs_value;
  if (src >= 0) {
    abs_value = UnsignedCast(src);
  } else {
    *dest = '-';
    ++dest;
    abs_value = NegatingUnsignedCast(src);
    width = SaturatingSub(width, size_t{1});
  }
  return WriteDec(abs_value, dest, width);
}

char* WriteDecBackward(uint32_t src, char* dest) {
  while (src >= 100) {
    const uint32_t digits = src % 100;
    src /= 100;
    dest -= 2;
    WriteDec2Impl(digits, dest);
  }
  if (src >= 10) {
    dest -= 2;
    WriteDec2Impl(src, dest);
  } else {
    --dest;
    WriteDec1Impl(src, dest);
  }
  return dest;
}

char* WriteDecBackward(uint64_t src, char* dest) {
  while (src > std::numeric_limits<uint32_t>::max()) {
    const uint32_t digits = IntCast<uint32_t>(src % 100);
    src /= 100;
    dest -= 2;
    WriteDec2Impl(digits, dest);
  }
  return WriteDecBackward(IntCast<uint32_t>(src), dest);
}

char* WriteDecBackward(absl::uint128 src, char* dest) {
  while (src > std::numeric_limits<uint64_t>::max()) {
    const uint32_t digits = IntCast<uint32_t>(src % 100);
    src /= 100;
    dest -= 2;
    WriteDec2Impl(digits, dest);
  }
  return WriteDecBackward(IntCast<uint64_t>(src), dest);
}

char* WriteDecBackward(int32_t src, char* dest) {
  if (src >= 0) {
    return WriteDecBackward(UnsignedCast(src), dest);
  } else {
    dest = WriteDecBackward(NegatingUnsignedCast(src), dest);
    --dest;
    *dest = '-';
    return dest;
  }
}

char* WriteDecBackward(int64_t src, char* dest) {
  if (src >= 0) {
    return WriteDecBackward(UnsignedCast(src), dest);
  } else {
    dest = WriteDecBackward(NegatingUnsignedCast(src), dest);
    --dest;
    *dest = '-';
    return dest;
  }
}

char* WriteDecBackward(absl::int128 src, char* dest) {
  if (src >= 0) {
    return WriteDecBackward(UnsignedCast(src), dest);
  } else {
    dest = WriteDecBackward(NegatingUnsignedCast(src), dest);
    --dest;
    *dest = '-';
    return dest;
  }
}

// For `StringifiedSize(value)`, there is an entry for each
// `absl::bit_width(value) / 2`.
template <typename T>
struct StringifiedSizeEntry {
  using Size = std::conditional_t<(sizeof(T) < sizeof(size_t)), T, size_t>;
  Size size;    // Number of digits at the upper bound of this entry.
  T threshold;  // If `value < threshold`, there is one digit less.
};

template <typename T>
constexpr size_t kNumStringifiedSizeEntries =
    std::numeric_limits<T>::digits / 2 + 1;

template <typename T>
constexpr std::array<StringifiedSizeEntry<T>, kNumStringifiedSizeEntries<T>>
MakeStringifiedSizeEntries() {
  // Default construction of `entries` should not be needed, but without that
  // GCC rejects this code in constexpr context for some reason.
  std::array<StringifiedSizeEntry<T>, kNumStringifiedSizeEntries<T>> entries{};
  typename StringifiedSizeEntry<T>::Size size = 1;
  T threshold = 1;
  T upper_bound = 3;
  for (StringifiedSizeEntry<T>& entry : entries) {
    entry.size = size;
    entry.threshold = threshold;
    if (size < std::numeric_limits<T>::digits10 + 1) {
      // Some `absl::uint128` operations are not constexpr,
      // hence multiplication is avoided.
      upper_bound = (upper_bound << 2) + 3;
      const T next_threshold = (threshold << 3) + (threshold << 1);
      if (upper_bound >= next_threshold) {
        ++size;
        threshold = next_threshold;
      }
    }
  }
  entries[0].threshold = T{0};  // 0 has one digit.
  return entries;
}

size_t StringifiedSize(uint32_t src) {
  static constexpr std::array<StringifiedSizeEntry<uint32_t>,
                              kNumStringifiedSizeEntries<uint32_t>>
      kEntries = MakeStringifiedSizeEntries<uint32_t>();
  const size_t width = IntCast<size_t>(absl::bit_width(src));
  const StringifiedSizeEntry<uint32_t>& entry = kEntries[width / 2];
  return size_t{entry.size} - (src < entry.threshold ? 1 : 0);
}

size_t StringifiedSize(uint64_t src) {
  static constexpr std::array<StringifiedSizeEntry<uint64_t>,
                              kNumStringifiedSizeEntries<uint64_t>>
      kEntries = MakeStringifiedSizeEntries<uint64_t>();
  const size_t width = IntCast<size_t>(absl::bit_width(src));
  const StringifiedSizeEntry<uint64_t>& entry = kEntries[width / 2];
  return size_t{entry.size} - (src < entry.threshold ? 1 : 0);
}

size_t StringifiedSize(absl::uint128 src) {
  static constexpr std::array<StringifiedSizeEntry<absl::uint128>,
                              kNumStringifiedSizeEntries<absl::uint128>>
      kEntries = MakeStringifiedSizeEntries<absl::uint128>();
  const size_t width =
      IntCast<size_t>(absl::Uint128High64(src) == 0
                          ? absl::bit_width(absl::Uint128Low64(src))
                          : absl::bit_width(absl::Uint128High64(src)) + 64);
  const StringifiedSizeEntry<absl::uint128>& entry = kEntries[width / 2];
  return size_t{entry.size} - (src < entry.threshold ? 1 : 0);
}

size_t StringifiedSize(int32_t src) {
  if (src >= 0) {
    return StringifiedSize(UnsignedCast(src));
  } else {
    return StringifiedSize(NegatingUnsignedCast(src)) + 1;
  }
}

size_t StringifiedSize(int64_t src) {
  if (src >= 0) {
    return StringifiedSize(UnsignedCast(src));
  } else {
    return StringifiedSize(NegatingUnsignedCast(src)) + 1;
  }
}

size_t StringifiedSize(absl::int128 src) {
  if (src >= 0) {
    return StringifiedSize(UnsignedCast(src));
  } else {
    return StringifiedSize(NegatingUnsignedCast(src)) + 1;
  }
}

}  // namespace riegeli::write_int_internal


================================================
FILE: riegeli/bytes/write_int_internal.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_WRITE_INT_INTERNAL_H_
#define RIEGELI_BYTES_WRITE_INT_INTERNAL_H_

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <type_traits>

#include "absl/numeric/int128.h"
#include "riegeli/base/arithmetic.h"

namespace riegeli::write_int_internal {

template <typename T, typename Target, typename Enable = void>
struct FitsIn;

template <typename T, typename Target>
struct FitsIn<T, Target,
              std::enable_if_t<std::disjunction_v<
                  std::conjunction<IsUnsignedInt<T>, IsUnsignedInt<Target>>,
                  std::conjunction<IsSignedInt<T>, IsSignedInt<Target>>>>>
    : std::bool_constant<(std::numeric_limits<T>::max() <=
                          std::numeric_limits<Target>::max())> {};

// `WriteDec()` with no width parameter writes no leading zeros, except for 0
// itself.
char* WriteDec(uint32_t src, char* dest);
char* WriteDec(uint64_t src, char* dest);
char* WriteDec(absl::uint128 src, char* dest);
char* WriteDec(int32_t src, char* dest);
char* WriteDec(int64_t src, char* dest);
char* WriteDec(absl::int128 src, char* dest);

// `WriteDecUnsigned()` with no width parameter writes no leading zeros, except
// for 0 itself.

template <typename T, std::enable_if_t<FitsIn<T, uint32_t>::value, int> = 0>
inline char* WriteDecUnsigned(T src, char* dest) {
  return WriteDec(IntCast<uint32_t>(src), dest);
}

template <typename T, std::enable_if_t<
                          std::conjunction_v<std::negation<FitsIn<T, uint32_t>>,
                                             FitsIn<T, uint64_t>>,
                          int> = 0>
inline char* WriteDecUnsigned(T src, char* dest) {
  return WriteDec(IntCast<uint64_t>(src), dest);
}

template <typename T, std::enable_if_t<
                          std::conjunction_v<std::negation<FitsIn<T, uint64_t>>,
                                             FitsIn<T, absl::uint128>>,
                          int> = 0>
inline char* WriteDecUnsigned(T src, char* dest) {
  return WriteDec(IntCast<absl::uint128>(src), dest);
}

// `WriteDecSigned()` with no width parameter writes no leading zeros, except
// for 0 itself.

template <typename T, std::enable_if_t<FitsIn<T, int32_t>::value, int> = 0>
inline char* WriteDecSigned(T src, char* dest) {
  return WriteDec(IntCast<int32_t>(src), dest);
}

template <typename T,
          std::enable_if_t<std::conjunction_v<std::negation<FitsIn<T, int32_t>>,
                                              FitsIn<T, int64_t>>,
                           int> = 0>
inline char* WriteDecSigned(T src, char* dest) {
  return WriteDec(IntCast<int64_t>(src), dest);
}

template <typename T,
          std::enable_if_t<std::conjunction_v<std::negation<FitsIn<T, int64_t>>,
                                              FitsIn<T, absl::int128>>,
                           int> = 0>
inline char* WriteDecSigned(T src, char* dest) {
  return WriteDec(IntCast<absl::int128>(src), dest);
}

// `WriteDec()` with a width parameter writes at least `width` characters.
char* WriteDec(uint32_t src, char* dest, size_t width);
char* WriteDec(uint64_t src, char* dest, size_t width);
char* WriteDec(absl::uint128 src, char* dest, size_t width);
char* WriteDec(int32_t src, char* dest, size_t width);
char* WriteDec(int64_t src, char* dest, size_t width);
char* WriteDec(absl::int128 src, char* dest, size_t width);

// `WriteDecUnsigned()` with a width parameter writes at least `width`
// characters.

template <typename T, std::enable_if_t<FitsIn<T, uint32_t>::value, int> = 0>
inline char* WriteDecUnsigned(T src, char* dest, size_t width) {
  return width <= 1 ? WriteDec(IntCast<uint32_t>(src), dest)
                    : WriteDec(IntCast<uint32_t>(src), dest, width);
}

template <typename T, std::enable_if_t<
                          std::conjunction_v<std::negation<FitsIn<T, uint32_t>>,
                                             FitsIn<T, uint64_t>>,
                          int> = 0>
inline char* WriteDecUnsigned(T src, char* dest, size_t width) {
  return width <= 1 ? WriteDec(IntCast<uint64_t>(src), dest)
                    : WriteDec(IntCast<uint64_t>(src), dest, width);
}

template <typename T, std::enable_if_t<
                          std::conjunction_v<std::negation<FitsIn<T, uint64_t>>,
                                             FitsIn<T, absl::uint128>>,
                          int> = 0>
inline char* WriteDecUnsigned(T src, char* dest, size_t width) {
  return width <= 1 ? WriteDec(IntCast<absl::uint128>(src), dest)
                    : WriteDec(IntCast<absl::uint128>(src), dest, width);
}

// `WriteDecSigned()` with a width parameter writes at least `width` characters.

template <typename T, std::enable_if_t<FitsIn<T, int32_t>::value, int> = 0>
inline char* WriteDecSigned(T src, char* dest, size_t width) {
  return width <= 1 ? WriteDec(IntCast<int32_t>(src), dest)
                    : WriteDec(IntCast<int32_t>(src), dest, width);
}

template <typename T,
          std::enable_if_t<std::conjunction_v<std::negation<FitsIn<T, int32_t>>,
                                              FitsIn<T, int64_t>>,
                           int> = 0>
inline char* WriteDecSigned(T src, char* dest, size_t width) {
  return width <= 1 ? WriteDec(IntCast<int64_t>(src), dest)
                    : WriteDec(IntCast<int64_t>(src), dest, width);
}

template <typename T,
          std::enable_if_t<std::conjunction_v<std::negation<FitsIn<T, int64_t>>,
                                              FitsIn<T, absl::int128>>,
                           int> = 0>
inline char* WriteDecSigned(T src, char* dest, size_t width) {
  return width <= 1 ? WriteDec(IntCast<absl::int128>(src), dest)
                    : WriteDec(IntCast<absl::int128>(src), dest, width);
}

// `WriteDecBackward()` writes no leading zeros, except for 0 itself.
char* WriteDecBackward(uint32_t src, char* dest);
char* WriteDecBackward(uint64_t src, char* dest);
char* WriteDecBackward(absl::uint128 src, char* dest);
char* WriteDecBackward(int32_t src, char* dest);
char* WriteDecBackward(int64_t src, char* dest);
char* WriteDecBackward(absl::int128 src, char* dest);

// `WriteDecUnsignedBackward()` writes no leading zeros, except for 0 itself.

template <typename T, std::enable_if_t<FitsIn<T, uint32_t>::value, int> = 0>
inline char* WriteDecUnsignedBackward(T src, char* dest) {
  return WriteDecBackward(IntCast<uint32_t>(src), dest);
}

template <typename T, std::enable_if_t<
                          std::conjunction_v<std::negation<FitsIn<T, uint32_t>>,
                                             FitsIn<T, uint64_t>>,
                          int> = 0>
inline char* WriteDecUnsignedBackward(T src, char* dest) {
  return WriteDecBackward(IntCast<uint64_t>(src), dest);
}

template <typename T, std::enable_if_t<
                          std::conjunction_v<std::negation<FitsIn<T, uint64_t>>,
                                             FitsIn<T, absl::uint128>>,
                          int> = 0>
inline char* WriteDecUnsignedBackward(T src, char* dest) {
  return WriteDecBackward(IntCast<absl::uint128>(src), dest);
}

// `WriteDecSignedBackward()` writes no leading zeros, except for 0 itself.

template <typename T, std::enable_if_t<FitsIn<T, int32_t>::value, int> = 0>
inline char* WriteDecSignedBackward(T src, char* dest) {
  return WriteDecBackward(IntCast<int32_t>(src), dest);
}

template <typename T,
          std::enable_if_t<std::conjunction_v<std::negation<FitsIn<T, int32_t>>,
                                              FitsIn<T, int64_t>>,
                           int> = 0>
inline char* WriteDecSignedBackward(T src, char* dest) {
  return WriteDecBackward(IntCast<int64_t>(src), dest);
}

template <typename T,
          std::enable_if_t<std::conjunction_v<std::negation<FitsIn<T, int64_t>>,
                                              FitsIn<T, absl::int128>>,
                           int> = 0>
inline char* WriteDecSignedBackward(T src, char* dest) {
  return WriteDecBackward(IntCast<absl::int128>(src), dest);
}

size_t StringifiedSize(uint32_t src);
size_t StringifiedSize(uint64_t src);
size_t StringifiedSize(absl::uint128 src);
size_t StringifiedSize(int32_t src);
size_t StringifiedSize(int64_t src);
size_t StringifiedSize(absl::int128 src);

template <typename T, std::enable_if_t<FitsIn<T, uint32_t>::value, int> = 0>
inline size_t StringifiedSizeUnsigned(T src) {
  return StringifiedSize(IntCast<uint32_t>(src));
}

template <typename T, std::enable_if_t<
                          std::conjunction_v<std::negation<FitsIn<T, uint32_t>>,
                                             FitsIn<T, uint64_t>>,
                          int> = 0>
inline size_t StringifiedSizeUnsigned(T src) {
  return StringifiedSize(IntCast<uint64_t>(src));
}

template <typename T, std::enable_if_t<
                          std::conjunction_v<std::negation<FitsIn<T, uint64_t>>,
                                             FitsIn<T, absl::uint128>>,
                          int> = 0>
inline size_t StringifiedSizeUnsigned(T src) {
  return StringifiedSize(IntCast<absl::uint128>(src));
}

template <typename T, std::enable_if_t<FitsIn<T, int32_t>::value, int> = 0>
inline size_t StringifiedSizeSigned(T src) {
  return StringifiedSize(IntCast<int32_t>(src));
}

template <typename T,
          std::enable_if_t<std::conjunction_v<std::negation<FitsIn<T, int32_t>>,
                                              FitsIn<T, int64_t>>,
                           int> = 0>
inline size_t StringifiedSizeSigned(T src) {
  return StringifiedSize(IntCast<int64_t>(src));
}

template <typename T,
          std::enable_if_t<std::conjunction_v<std::negation<FitsIn<T, int64_t>>,
                                              FitsIn<T, absl::int128>>,
                           int> = 0>
inline size_t StringifiedSizeSigned(T src) {
  return StringifiedSize(IntCast<absl::int128>(src));
}

}  // namespace riegeli::write_int_internal

#endif  // RIEGELI_BYTES_WRITE_INT_INTERNAL_H_


================================================
FILE: riegeli/bytes/writer.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/writer.h"

#include <stddef.h>

#include <cmath>
#include <cstring>
#include <limits>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/numeric/int128.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/cord_utils.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

namespace {

template <typename T>
inline bool WriteUnsigned(T src, Writer& dest) {
  // `digits10` is rounded down, `kMaxNumDigits` is rounded up, hence `+ 1`.
  constexpr size_t kMaxNumDigits = std::numeric_limits<T>::digits10 + 1;
  if (ABSL_PREDICT_FALSE(!dest.Push(kMaxNumDigits))) return false;
  dest.set_cursor(write_int_internal::WriteDecUnsigned(src, dest.cursor()));
  return true;
}

template <typename T>
inline bool WriteSigned(T src, Writer& dest) {
  // `digits10` is rounded down, `kMaxNumDigits` is rounded up, hence `+ 1`.
  constexpr size_t kMaxNumDigits = std::numeric_limits<T>::digits10 + 1;
  // `+ 1` for the minus sign.
  if (ABSL_PREDICT_FALSE(!dest.Push(kMaxNumDigits + 1))) return false;
  dest.set_cursor(write_int_internal::WriteDecSigned(src, dest.cursor()));
  return true;
}

}  // namespace

void Writer::OnFail() { set_buffer(start()); }

absl::Status Writer::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) return Annotate(status, absl::StrCat("at byte ", pos()));
  return status;
}

bool Writer::FailOverflow() {
  return Fail(absl::ResourceExhaustedError("Writer position overflow"));
}

bool Writer::Write(const Chain& src) {
#ifdef MEMORY_SANITIZER
  for (const absl::string_view fragment : src.blocks()) {
    AssertInitialized(fragment.data(), fragment.size());
  }
#endif
  if (ABSL_PREDICT_TRUE(available() >= src.size() &&
                        src.size() <= kMaxBytesToCopy)) {
    src.CopyTo(cursor());
    move_cursor(src.size());
    return true;
  }
  AssertInitialized(start(), start_to_cursor());
  return WriteSlow(src);
}

bool Writer::Write(Chain&& src) {
#ifdef MEMORY_SANITIZER
  for (const absl::string_view fragment : src.blocks()) {
    AssertInitialized(fragment.data(), fragment.size());
  }
#endif
  if (ABSL_PREDICT_TRUE(available() >= src.size() &&
                        src.size() <= kMaxBytesToCopy)) {
    src.CopyTo(cursor());
    move_cursor(src.size());
    return true;
  }
  AssertInitialized(start(), start_to_cursor());
  return WriteSlow(std::move(src));
}

bool Writer::Write(const absl::Cord& src) {
#ifdef MEMORY_SANITIZER
  for (const absl::string_view fragment : src.Chunks()) {
    AssertInitialized(fragment.data(), fragment.size());
  }
#endif
  if (ABSL_PREDICT_TRUE(available() >= src.size() &&
                        src.size() <= kMaxBytesToCopy)) {
    cord_internal::CopyCordToArray(src, cursor());
    move_cursor(src.size());
    return true;
  }
  AssertInitialized(start(), start_to_cursor());
  return WriteSlow(src);
}

bool Writer::Write(absl::Cord&& src) {
#ifdef MEMORY_SANITIZER
  for (const absl::string_view fragment : src.Chunks()) {
    AssertInitialized(fragment.data(), fragment.size());
  }
#endif
  if (ABSL_PREDICT_TRUE(available() >= src.size() &&
                        src.size() <= kMaxBytesToCopy)) {
    cord_internal::CopyCordToArray(src, cursor());
    move_cursor(src.size());
    return true;
  }
  AssertInitialized(start(), start_to_cursor());
  return WriteSlow(std::move(src));
}

bool Writer::Write(signed char src) { return WriteSigned(src, *this); }

bool Writer::Write(unsigned char src) { return WriteUnsigned(src, *this); }

bool Writer::Write(short src) { return WriteSigned(src, *this); }

bool Writer::Write(unsigned short src) { return WriteUnsigned(src, *this); }

bool Writer::Write(int src) { return WriteSigned(src, *this); }

bool Writer::Write(unsigned src) { return WriteUnsigned(src, *this); }

bool Writer::Write(long src) { return WriteSigned(src, *this); }

bool Writer::Write(unsigned long src) { return WriteUnsigned(src, *this); }

bool Writer::Write(long long src) { return WriteSigned(src, *this); }

bool Writer::Write(unsigned long long src) { return WriteUnsigned(src, *this); }

bool Writer::Write(absl::int128 src) { return WriteSigned(src, *this); }

bool Writer::Write(absl::uint128 src) { return WriteUnsigned(src, *this); }

// TODO: Optimize implementations below.
bool Writer::Write(float src) { return Write(absl::StrCat(src)); }

bool Writer::Write(double src) { return Write(absl::StrCat(src)); }

bool Writer::Write(long double src) {
  absl::Format(this, "%g",
               // Consistently use "nan", never "-nan".
               ABSL_PREDICT_FALSE(std::isnan(src))
                   ? std::numeric_limits<long double>::quiet_NaN()
                   : src);
  return ok();
}

bool Writer::WriteSlow(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of Writer::WriteSlow(string_view): "
         "enough space available, use Write(string_view) instead";
  do {
    const size_t available_length = available();
    riegeli::null_safe_memcpy(cursor(), src.data(), available_length);
    move_cursor(available_length);
    src.remove_prefix(available_length);
    if (ABSL_PREDICT_FALSE(!PushSlow(1, src.size()))) return false;
  } while (src.size() > available());
  std::memcpy(cursor(), src.data(), src.size());
  move_cursor(src.size());
  return true;
}

bool Writer::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  return Write(absl::string_view(src));
}

bool Writer::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  for (const absl::string_view fragment : src.blocks()) {
    if (ABSL_PREDICT_FALSE(!Write(fragment))) return false;
  }
  return true;
}

bool Writer::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  // Not `std::move(src)`: forward to `WriteSlow(const Chain&)`.
  return WriteSlow(src);
}

bool Writer::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  if (const std::optional<absl::string_view> flat = src.TryFlat();
      flat != std::nullopt) {
    return Write(*flat);
  }
  for (const absl::string_view fragment : src.Chunks()) {
    if (ABSL_PREDICT_FALSE(!Write(fragment))) return false;
  }
  return true;
}

bool Writer::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  // Not `std::move(src)`: forward to `WriteSlow(const absl::Cord&)`.
  return WriteSlow(src);
}

bool Writer::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  while (src.size() > available()) {
    const size_t available_length = available();
    riegeli::null_safe_memset(cursor(), src.fill(), available_length);
    move_cursor(available_length);
    src.Extract(available_length);
    if (ABSL_PREDICT_FALSE(!Push(1, SaturatingIntCast<size_t>(src.size())))) {
      return false;
    }
  }
  std::memset(cursor(), src.fill(), IntCast<size_t>(src.size()));
  move_cursor(IntCast<size_t>(src.size()));
  return true;
}

bool Writer::FlushImpl(FlushType flush_type) { return ok(); }

bool Writer::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT_NE(new_pos, pos())
      << "Failed precondition of Writer::SeekSlow(): "
         "position unchanged, use Seek() instead";
  return Fail(absl::UnimplementedError("Writer::Seek() not supported"));
}

std::optional<Position> Writer::SizeImpl() {
  Fail(absl::UnimplementedError("Writer::Size() not supported"));
  return std::nullopt;
}

bool Writer::TruncateImpl(Position new_size) {
  return Fail(absl::UnimplementedError("Writer::Truncate() not supported"));
}

Reader* Writer::ReadModeImpl(Position initial_pos) {
  Fail(absl::UnimplementedError("Writer::ReadMode() not supported"));
  return nullptr;
}

namespace writer_internal {

void DeleteReader(Reader* reader) { delete reader; }

}  // namespace writer_internal

}  // namespace riegeli


================================================
FILE: riegeli/bytes/writer.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_WRITER_H_
#define RIEGELI_BYTES_WRITER_H_

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/numeric/int128.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/has_absl_stringify.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/object.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/stringify.h"

namespace riegeli {

class Reader;
class Writer;

// A sink for `AbslStringify()` which appends to a `Writer`.
class WriterStringifySink {
 public:
  // Creates a dummy `WriterStringifySink`. It must not be used.
  WriterStringifySink() = default;

  // Will write to `*dest`.
  explicit WriterStringifySink(Writer* dest ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : dest_(RIEGELI_EVAL_ASSERT_NOTNULL(dest)) {}

  WriterStringifySink(WriterStringifySink&& that) = default;
  WriterStringifySink& operator=(WriterStringifySink&& that) = default;

  // Returns the `Writer` being written to.
  Writer* dest() const { return dest_; }

  void Append(size_t length, char fill);
  void Append(absl::string_view src);
  friend void AbslFormatFlush(WriterStringifySink* dest,
                              absl::string_view src) {
    dest->Append(src);
  }

 private:
  Writer* dest_ = nullptr;
};

// Abstract class `Writer` writes sequences of bytes to a destination. The
// nature of the destination depends on the particular class derived from
// `Writer`.
//
// A `Writer` object manages a buffer of data to be pushed to the destination,
// which amortizes the overhead of pushing data over multiple writes. Data can
// be written directly into the buffer, and classes derived from `Writer` can
// avoid copying by allocating the buffer in a way which fits the destination,
// e.g. pointing it to a fragment of the destination itself.
//
// All `Writer`s support writing data sequentially and querying for the current
// position. Some `Writer`s also support random access: changing the position
// for subsequent operations and querying for the total size of the destination.
// Some `Writer`s also support truncation; this is implied by supporting random
// access.
//
// A `Writer` must be explicitly closed, and `Close()` must succeed, in order
// for its output to be guaranteed to be available in the destination.
class Writer : public Object {
 public:
  // The same as `Object::Close()`.
  //
  // The implementation in this class adds an assertion.
  bool Close();

  // If `write_size_hint` is not `std::nullopt`, hints that this amount of data
  // will be written sequentially from the current position, then `Close()` will
  // be called.
  //
  // This may improve performance and memory usage:
  //  * Larger buffer sizes may be used before reaching the size hint, and
  //    a smaller buffer size may be used when reaching the size hint.
  //  * This hint may be propagated to owned destinations.
  //  * Other consequences are possible.
  //
  // If the hint turns out to not match reality, nothing breaks. It is better if
  // `write_size_hint` is slightly too large than slightly too small.
  //
  // `SetWriteSizeHint()` is usually be called from the same abstraction layer
  // which later calls `Close()`.
  void SetWriteSizeHint(std::optional<Position> write_size_hint);

  // Ensures that enough space is available in the buffer: if less than
  // `min_length` of space is available, pushes previously written data to the
  // destination, and points `cursor()` and `limit()` to space following the
  // current position with length at least `min_length`, preferably
  // `recommended_length`.
  //
  // The current position does not change with `Push()`. It changes with e.g.
  // `move_cursor()` and `Write()`.
  //
  // If `recommended_length < min_length`, `recommended_length` is assumed to be
  // `min_length`.
  //
  // Return values:
  //  * `true`  - success (`available() >= min_length`)
  //  * `false` - failure (`available() < min_length`, `!ok()`)
  bool Push(size_t min_length = 1, size_t recommended_length = 0);

  // Buffer pointers. Space between `start()` and `limit()` is available for
  // immediate writing data to it, with `cursor()` pointing to the current
  // position.
  //
  // Memory after the address to which `cursor()` is eventually moved must not
  // be clobbered.
  //
  // Non-const member functions may change buffer pointers, including changing
  // how much data around the current position are buffered.
  //
  // Invariants:
  //   `start() <= cursor() <= limit()` (possibly all `nullptr`)
  //   if `!ok()` then `start() == cursor() == limit() == nullptr`
  char* start() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return start_; }
  char* cursor() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return cursor_; }
  char* limit() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return limit_; }

  // Increments the value of `cursor()`. Does not change `start()` nor
  // `limit()`. Call this during writing data under `cursor()` to indicate how
  // much was written.
  //
  // Precondition: `length <= available()`
  void move_cursor(size_t length);

  // Sets the value of `cursor()`. Does not change `start()` nor `limit()`. Call
  // this during writing data under `cursor()` to indicate how much was written.
  //
  // Preconditions: `start() <= cursor <= limit()`
  void set_cursor(char* cursor);

  // Returns the amount of space available in the buffer, between `cursor()` and
  // `limit()`.
  //
  // Invariant: if `!ok()` then `available() == 0`
  size_t available() const { return PtrDistance(cursor_, limit_); }

  // Returns the buffer size, between `start()` and `limit()`.
  //
  // Invariant: if `!ok()` then `start_to_limit() == 0`
  size_t start_to_limit() const { return PtrDistance(start_, limit_); }

  // Returns the amount of data written to the buffer, between `start()` and
  // `cursor()`.
  //
  // Invariant: if `!ok()` then `start_to_cursor() == 0`
  size_t start_to_cursor() const { return PtrDistance(start_, cursor_); }

  // Writes a single byte to the buffer or the destination.
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  bool WriteByte(uint8_t src) { return Write(static_cast<char>(src)); }

  // Writes a fixed number of bytes from `src` to the buffer and/or the
  // destination.
  //
  // Return values:
  //  * `true`  - success (`src.size()` bytes written)
  //  * `false` - failure (less than `src.size()` bytes written, `!ok()`)
  bool Write(char src);
#if __cpp_char8_t
  bool Write(char8_t src) { return Write(static_cast<char>(src)); }
#endif
  bool Write(BytesRef src);
  ABSL_ATTRIBUTE_ALWAYS_INLINE
  bool Write(const char* src) { return Write(absl::string_view(src)); }
  bool Write(ExternalRef src);
  template <typename Src,
            std::enable_if_t<SupportsExternalRefWhole<Src>::value, int> = 0>
  bool Write(Src&& src);
  bool Write(const Chain& src);
  bool Write(Chain&& src);
  bool Write(const absl::Cord& src);
  bool Write(absl::Cord&& src);
  bool Write(ByteFill src);

  // Writes a stringified value to the buffer and/or the destination.
  //
  // Return values:
  //  * `true`  - success
  //  * `false` - failure (`!ok()`)
  bool Write(signed char src);
  bool Write(unsigned char src);
  bool Write(short src);
  bool Write(unsigned short src);
  bool Write(int src);
  bool Write(unsigned src);
  bool Write(long src);
  bool Write(unsigned long src);
  bool Write(long long src);
  bool Write(unsigned long long src);
  bool Write(absl::int128 src);
  bool Write(absl::uint128 src);
  bool Write(float src);
  bool Write(double src);
  bool Write(long double src);
  template <
      typename Src,
      std::enable_if_t<
          std::conjunction_v<
              absl::HasAbslStringify<Src>,
              std::negation<std::is_convertible<Src&&, BytesRef>>,
              std::negation<std::is_convertible<Src&&, const Chain&>>,
              std::negation<std::is_convertible<Src&&, const absl::Cord&>>,
              std::negation<std::is_convertible<Src&&, ByteFill>>>,
          int> = 0>
  bool Write(Src&& src);

  // Other integer types are is not supported. Delete overloads to avoid
  // implicit conversions.
  bool Write(bool src) = delete;
  bool Write(wchar_t src) = delete;
  bool Write(char16_t src) = delete;
  bool Write(char32_t src) = delete;

  // Writes stringified values to the buffer and/or the destination.
  //
  // Return values:
  //  * `true`  - success
  //  * `false` - failure (`!ok()`)
  template <typename... Srcs
#if !__cpp_concepts
            ,
            std::enable_if_t<
                std::conjunction_v<std::bool_constant<sizeof...(Srcs) != 1>,
                                   IsStringifiable<Srcs...>>,
                int> = 0
#endif
            >
  bool Write(Srcs&&... srcs)
#if __cpp_concepts
      // For conjunctions, `requires` gives better error messages than
      // `std::enable_if_t`, indicating the relevant argument.
    requires(sizeof...(Srcs) != 1) && (IsStringifiable<Srcs>::value && ...)
#endif
  ;

  // Pushes buffered data to the destination.
  //
  // This makes data written so far visible, but in contrast to `Close()`,
  // keeps the possibility to write more data later. What exactly does it mean
  // for data to be visible depends on the destination. If this is not
  // applicable or not feasible, does nothing.
  //
  // The scope of objects to flush and the intended data durability (without a
  // guarantee) are specified by `flush_type`:
  //  * `FlushType::kFromObject`  - Makes data written so far visible in other
  //                                objects, propagating flushing through owned
  //                                dependencies of the given writer.
  //  * `FlushType::kFromProcess` - Makes data written so far visible outside
  //                                the process, propagating flushing through
  //                                dependencies of the given writer.
  //                                This is the default.
  //  * `FlushType::kFromMachine` - Makes data written so far visible outside
  //                                the process and durable in case of operating
  //                                system crash, propagating flushing through
  //                                dependencies of the given writer.
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  bool Flush(FlushType flush_type = FlushType::kFromProcess);

  // Returns the current position.
  //
  // This is not necessarily 0 after creating the `Writer` if it appends to a
  // destination with existing contents, or if the `Writer` wraps another writer
  // or output stream propagating its position.
  //
  // This may decrease when the `Writer` becomes not OK (due to buffering,
  // previously written but unflushed data may be lost).
  //
  // `pos()` is unchanged by a successful `Close()`.
  Position pos() const;

  // Returns the position corresponding to `start()`,
  // i.e. `pos() - start_to_cursor()`.
  Position start_pos() const { return start_pos_; }

  // Returns the position corresponding to `limit()`,
  // i.e. `pos() + available()`.
  Position limit_pos() const;

  // Returns `true` if this Writer supports `Seek()` to other positions
  // (`Seek()` to the current position is always supported) and `Size()`.
  virtual bool SupportsRandomAccess() { return false; }

  // Sets the current position for subsequent operations.
  //
  // Return values:
  //  * `true`                 - success (position is set to `new_pos`)
  //  * `false` (when `ok()`)  - destination ends before `new_pos`
  //                             (position is set to end)
  //  * `false` (when `!ok()`) - failure
  //
  // `Seek()` to the current position is always supported.
  //
  // `Seek()` to other positions is supported if `SupportsRandomAccess()` is
  // `true`.
  bool Seek(Position new_pos);

  // Returns the size of the destination, i.e. the position corresponding to its
  // end.
  //
  // Returns `std::nullopt` on failure (`!ok()`).
  //
  // `Size()` is supported if `SupportsRandomAccess()` is `true`.
  std::optional<Position> Size();

  // Returns `true` if this `Writer` supports `Truncate()`.
  virtual bool SupportsTruncate() { return SupportsRandomAccess(); }

  // Discards the part of the destination after the given position. Sets the
  // current position to the new end.
  //
  // Return values:
  //  * `true`                 - success (destination truncated, `ok()`)
  //  * `false` (when `ok()`)  - destination is smaller than `new_size`
  //                             (position is set to end)
  //  * `false` (when `!ok()`) - failure
  //
  // `Truncate()` is supported if `SupportsTruncate()` is `true`.
  bool Truncate(Position new_size);

  // Returns `true` if this `Writer` supports `ReadMode()`.
  virtual bool SupportsReadMode() { return false; }

  // Switches from writing to reading.
  //
  // Returns a `Reader` which reads from the current contents of the destination
  // of this `Writer`, starting from `initial_pos`.
  //
  // If the source ends before `initial_pos`, the position of the new `Reader`
  // is set to the end. The resulting `Reader` `SupportsRewind()`. If this
  // `Writer` `SupportsRandomAccess()`, the resulting `Reader` also
  // `SupportsRandomAccess()`.
  //
  // When this `Writer` is used again, its position is the same as before
  // `ReadMode()` was called, and the `Reader` becomes invalid.
  //
  // The returned `Reader` is owned by this `Writer`. The `Reader` does not own
  // its source, even if this `Writer` owns its destination.
  //
  // Returns `nullptr` on failure (`!ok()`).
  //
  // `ReadMode()` is supported if `SupportsReadMode()` is `true`.
  Reader* ReadMode(Position initial_pos) ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Supports `absl::Format(&writer, format, args...)`.
  friend void AbslFormatFlush(Writer* dest, absl::string_view src) {
    dest->Write(src);
  }

 protected:
  using Object::Object;

  // Moves the part of the object defined in this class.
  //
  // Buffer pointers do not need to satisfy their invariants during this part of
  // the move, here they are merely exchanged with `nullptr` and copied.
  Writer(Writer&& that) noexcept;
  Writer& operator=(Writer&& that) noexcept;

  // Makes `*this` equivalent to a newly constructed `Writer`. This avoids
  // constructing a temporary `Writer` and moving from it. Derived classes which
  // redefine `Reset()` should include a call to `Writer::Reset()`.
  void Reset(Closed);
  void Reset();

  // `Writer` overrides `Object::Done()` to set buffer pointers to `nullptr`.
  // Derived classes which override it further should include a call to
  // `Writer::Done()`.
  void Done() override;

  // `Writer` overrides `Object::OnFail()` to set buffer pointers to `nullptr`.
  // Derived classes which override it further should include a call to
  // `Writer::OnFail()`.
  //
  // `pos()` decreases by `start_to_cursor()` to indicate that any buffered
  // data have been lost.
  ABSL_ATTRIBUTE_COLD void OnFail() override;

  // `Writer` overrides `Object::AnnotateStatusImpl()` to annotate the status
  // with the current position.
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;

  // Marks the `Writer` as failed with message "Writer position overflow".
  // Always returns `false`.
  //
  // This can be called if the destination would exceed its maximum possible
  // size or if `start_pos()` would overflow.
  ABSL_ATTRIBUTE_COLD bool FailOverflow();

  // Implementation of `SetWriteSizeHint()`.
  virtual void SetWriteSizeHintImpl(
      ABSL_ATTRIBUTE_UNUSED std::optional<Position> write_size_hint) {}

  // Implementation of the slow part of `Push()`.
  //
  // Precondition: `available() < min_length`
  virtual bool PushSlow(size_t min_length, size_t recommended_length) = 0;

  // Sets the values of:
  //  * `start()`  - to `start`
  //  * `cursor()` - to `start + start_to_cursor`
  //  * `limit()`  - to `start + start_to_limit`
  //
  // Preconditions:
  //   [`start`..`start + start_to_limit`) is a valid byte range
  //   `start_to_cursor <= start_to_limit`
  void set_buffer(char* start = nullptr, size_t start_to_limit = 0,
                  size_t start_to_cursor = 0);

  // Implementation of the slow part of `Write()`.
  //
  // By default:
  //  * `WriteSlow(absl::string_view)` and `WriteSlow(ByteFill)` are
  //     implemented in terms of `PushSlow()`
  //  * `WriteSlow(ExternalRef)`, `WriteSlow(const Chain&)`, and
  //    `WriteSlow(const absl::Cord&)` are implemented in terms of
  //    `WriteSlow(absl::string_view)`
  //  * `WriteSlow(Chain&&)` is implemented in terms of
  //    `WriteSlow(const Chain&)`
  //  * `WriteSlow(absl::Cord&&)` is implemented in terms of
  //    `WriteSlow(const absl::Cord&)`
  //
  // Precondition for `WriteSlow(absl::string_view)`:
  //   `available() < src.size()`
  //
  // Precondition for `WriteSlow(ExternalRef)`, `WriteSlow(const Chain&)`,
  // `WriteSlow(Chain&&)`, `WriteSlow(const absl::Cord&)`,
  // `WriteSlow(absl::Cord&&), and `WriteSlow(ByteFill)`:
  //   `UnsignedMin(available(), kMaxBytesToCopy) < src.size()`
  virtual bool WriteSlow(absl::string_view src);
  virtual bool WriteSlow(ExternalRef src);
  virtual bool WriteSlow(const Chain& src);
  virtual bool WriteSlow(Chain&& src);
  virtual bool WriteSlow(const absl::Cord& src);
  virtual bool WriteSlow(absl::Cord&& src);
  virtual bool WriteSlow(ByteFill src);

  // Implementation of `Flush()`, except that the parameter is not defaulted,
  // which is problematic for virtual functions.
  //
  // By default does nothing and returns `ok()`.
  virtual bool FlushImpl(FlushType flush_type);

  // Increments the value of `start_pos()`.
  void move_start_pos(Position length);

  // Sets the value of `start_pos()`.
  void set_start_pos(Position start_pos);

  // Implementation of the slow part of `Seek()`.
  //
  // By default fails.
  //
  // Precondition: `new_pos != pos()`
  virtual bool SeekSlow(Position new_pos);

  // Implementation of `Size()`.
  //
  // By default fails.
  virtual std::optional<Position> SizeImpl();

  // Implementation of `Truncate()`.
  //
  // By default fails.
  virtual bool TruncateImpl(Position new_size);

  // Implementation of `ReadMode()`.
  //
  // By default fails.
  virtual Reader* ReadModeImpl(Position initial_pos);

 private:
  char* start_ = nullptr;
  char* cursor_ = nullptr;
  char* limit_ = nullptr;

  // Destination position corresponding to `start_`.
  //
  // Invariant:
  //   `start_pos_ <= std::numeric_limits<Position>::max() - start_to_limit()`
  Position start_pos_ = 0;
};

// Helps to implement `ReadMode()`. Stores a lazily created `Reader` of the
// given concrete type.
//
// The `ReaderClass` type does not have to be complete, except in a context when
// `ResetReader()` or `reader()` is called. This allows to have only a forward
// declaration of `ReaderClass` in the header which defines the containing
// `Writer` class.
template <typename ReaderClass>
class AssociatedReader {
 public:
  AssociatedReader() = default;

  AssociatedReader(AssociatedReader&& that) noexcept;
  AssociatedReader& operator=(AssociatedReader&& that) noexcept;

  ~AssociatedReader();

  // Destroys the contained `Reader`.
  void Reset();

  // Creates or resets the `ReaderClass` with the given arguments of the
  // constructor or `Reset()`. `ReaderClass` must be complete.
  template <typename... Args>
  ReaderClass* ResetReader(Args&&... args);

  // Returns the `ReaderClass` pointer, or `nullptr` if it was not created yet.
  // `ReaderClass` must be complete.
  ReaderClass* reader() const;

 private:
  static void Delete(Reader* reader);

  Reader* reader_ = nullptr;
};

// Implementation details follow.

inline void WriterStringifySink::Append(size_t length, char fill) {
  dest_->Write(ByteFill(length, fill));
}

inline void WriterStringifySink::Append(absl::string_view src) {
  dest_->Write(src);
}

inline Writer::Writer(Writer&& that) noexcept
    : Object(static_cast<Object&&>(that)),
      start_(std::exchange(that.start_, nullptr)),
      cursor_(std::exchange(that.cursor_, nullptr)),
      limit_(std::exchange(that.limit_, nullptr)),
      start_pos_(std::exchange(that.start_pos_, 0)) {}

inline Writer& Writer::operator=(Writer&& that) noexcept {
  Object::operator=(static_cast<Object&&>(that));
  start_ = std::exchange(that.start_, nullptr);
  cursor_ = std::exchange(that.cursor_, nullptr);
  limit_ = std::exchange(that.limit_, nullptr);
  start_pos_ = std::exchange(that.start_pos_, 0);
  return *this;
}

inline void Writer::Reset(Closed) {
  Object::Reset(kClosed);
  start_ = nullptr;
  cursor_ = nullptr;
  limit_ = nullptr;
  start_pos_ = 0;
}

inline void Writer::Reset() {
  Object::Reset();
  start_ = nullptr;
  cursor_ = nullptr;
  limit_ = nullptr;
  start_pos_ = 0;
}

inline bool Writer::Close() {
  AssertInitialized(start(), start_to_cursor());
  return Object::Close();
}

inline void Writer::Done() {
  start_pos_ = pos();
  set_buffer();
}

inline void Writer::SetWriteSizeHint(std::optional<Position> write_size_hint) {
  AssertInitialized(start(), start_to_cursor());
  SetWriteSizeHintImpl(write_size_hint);
}

inline bool Writer::Push(size_t min_length, size_t recommended_length) {
  if (ABSL_PREDICT_TRUE(available() >= min_length)) return true;
  AssertInitialized(start(), start_to_cursor());
  if (ABSL_PREDICT_FALSE(!PushSlow(min_length, recommended_length))) {
    return false;
  }
  RIEGELI_ASSERT_GE(available(), min_length)
      << "Failed postcondition of Writer::PushSlow(): "
         "not enough space available";
  return true;
}

inline void Writer::move_cursor(size_t length) {
  RIEGELI_ASSERT_LE(length, available())
      << "Failed precondition of Writer::move_cursor(): length out of range";
  cursor_ += length;
}

inline void Writer::set_cursor(char* cursor) {
  RIEGELI_ASSERT_GE(cursor, start())
      << "Failed precondition of Writer::set_cursor(): pointer out of range";
  RIEGELI_ASSERT_LE(cursor, limit())
      << "Failed precondition of Writer::set_cursor(): pointer out of range";
  cursor_ = cursor;
}

inline void Writer::set_buffer(char* start, size_t start_to_limit,
                               size_t start_to_cursor) {
  RIEGELI_ASSERT_LE(start_to_cursor, start_to_limit)
      << "Failed precondition of Writer::set_buffer(): length out of range";
  start_ = start;
  cursor_ = start + start_to_cursor;
  limit_ = start + start_to_limit;
}

inline bool Writer::Write(char src) {
  if (ABSL_PREDICT_FALSE(!Push())) return false;
  *cursor() = src;
  move_cursor(1);
  return true;
}

inline bool Writer::Write(BytesRef src) {
  AssertInitialized(src.data(), src.size());
  if (ABSL_PREDICT_TRUE(available() >= src.size())) {
    riegeli::null_safe_memcpy(cursor(), src.data(), src.size());
    move_cursor(src.size());
    return true;
  }
  AssertInitialized(start(), start_to_cursor());
  return WriteSlow(src);
}

inline bool Writer::Write(ExternalRef src) {
  if (ABSL_PREDICT_TRUE(available() >= src.size() &&
                        src.size() <= kMaxBytesToCopy)) {
    riegeli::null_safe_memcpy(cursor(), src.data(), src.size());
    move_cursor(src.size());
    return true;
  }
  AssertInitialized(start(), start_to_cursor());
  return WriteSlow(std::move(src));
}

template <typename Src,
          std::enable_if_t<SupportsExternalRefWhole<Src>::value, int>>
inline bool Writer::Write(Src&& src) {
  return Write(ExternalRef(std::forward<Src>(src)));
}

inline bool Writer::Write(ByteFill src) {
  if (ABSL_PREDICT_TRUE(available() >= src.size() &&
                        src.size() <= kMaxBytesToCopy)) {
    riegeli::null_safe_memset(cursor(), src.fill(),
                              IntCast<size_t>(src.size()));
    move_cursor(IntCast<size_t>(src.size()));
    return true;
  }
  AssertInitialized(start(), start_to_cursor());
  return WriteSlow(src);
}

template <typename Src,
          std::enable_if_t<
              std::conjunction_v<
                  absl::HasAbslStringify<Src>,
                  std::negation<std::is_convertible<Src&&, BytesRef>>,
                  std::negation<std::is_convertible<Src&&, const Chain&>>,
                  std::negation<std::is_convertible<Src&&, const absl::Cord&>>,
                  std::negation<std::is_convertible<Src&&, ByteFill>>>,
              int>>
bool Writer::Write(Src&& src) {
  WriterStringifySink sink(this);
  AbslStringify(sink, std::forward<Src>(src));
  return ok();
}

template <typename... Srcs
#if !__cpp_concepts
          ,
          std::enable_if_t<
              std::conjunction_v<std::bool_constant<sizeof...(Srcs) != 1>,
                                 IsStringifiable<Srcs...>>,
              int>
#endif
          >
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool Writer::Write(Srcs&&... srcs)
#if __cpp_concepts
  requires(sizeof...(Srcs) != 1) && (IsStringifiable<Srcs>::value && ...)
#endif
{
  return (Write(std::forward<Srcs>(srcs)) && ...);
}

inline bool Writer::Flush(FlushType flush_type) {
  AssertInitialized(start(), start_to_cursor());
  return FlushImpl(flush_type);
}

inline Position Writer::pos() const {
  RIEGELI_ASSERT_LE(start_pos_,
                    std::numeric_limits<Position>::max() - start_to_limit())
      << "Failed invariant of Writer: position of buffer limit overflow";
  return start_pos_ + start_to_cursor();
}

inline Position Writer::limit_pos() const {
  RIEGELI_ASSERT_LE(start_pos_,
                    std::numeric_limits<Position>::max() - start_to_limit())
      << "Failed invariant of Writer: position of buffer limit overflow";
  return start_pos_ + start_to_limit();
}

inline void Writer::move_start_pos(Position length) {
  RIEGELI_ASSERT_LE(length, std::numeric_limits<Position>::max() - start_pos_)
      << "Failed precondition of Writer::move_start_pos(): "
         "position out of range";
  start_pos_ += length;
}

inline void Writer::set_start_pos(Position start_pos) {
  start_pos_ = start_pos;
}

inline bool Writer::Seek(Position new_pos) {
  if (ABSL_PREDICT_TRUE(new_pos == pos())) return true;
  AssertInitialized(start(), start_to_cursor());
  return SeekSlow(new_pos);
}

inline std::optional<Position> Writer::Size() {
  AssertInitialized(start(), start_to_cursor());
  return SizeImpl();
}

inline bool Writer::Truncate(Position new_size) {
  AssertInitialized(start(), start_to_cursor());
  return TruncateImpl(new_size);
}

inline Reader* Writer::ReadMode(Position initial_pos)
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  AssertInitialized(start(), start_to_cursor());
  return ReadModeImpl(initial_pos);
}

namespace writer_internal {

// Does `delete reader`. This is defined in a separate file because `Reader`
// might be incomplete here.
void DeleteReader(Reader* reader);

}  // namespace writer_internal

template <typename ReaderClass>
inline AssociatedReader<ReaderClass>::AssociatedReader(
    AssociatedReader&& that) noexcept
    : reader_(std::exchange(that.reader_, nullptr)) {}

template <typename ReaderClass>
inline AssociatedReader<ReaderClass>& AssociatedReader<ReaderClass>::operator=(
    AssociatedReader&& that) noexcept {
  Delete(std::exchange(reader_, std::exchange(that.reader_, nullptr)));
  return *this;
}

template <typename ReaderClass>
inline AssociatedReader<ReaderClass>::~AssociatedReader() {
  Delete(reader_);
}

template <typename ReaderClass>
inline void AssociatedReader<ReaderClass>::Reset() {
  Delete(std::exchange(reader_, nullptr));
}

template <typename ReaderClass>
template <typename... Args>
inline ReaderClass* AssociatedReader<ReaderClass>::ResetReader(Args&&... args) {
  if (ABSL_PREDICT_FALSE(reader_ == nullptr)) {
    reader_ = new ReaderClass(std::forward<Args>(args)...);
  } else {
    riegeli::Reset(*reader(), std::forward<Args>(args)...);
  }
  return reader();
}

template <typename ReaderClass>
ReaderClass* AssociatedReader<ReaderClass>::reader() const {
  return static_cast<ReaderClass*>(reader_);
}

template <typename ReaderClass>
void AssociatedReader<ReaderClass>::Delete(Reader* reader) {
  if (ABSL_PREDICT_FALSE(reader != nullptr)) {
    writer_internal::DeleteReader(reader);
  }
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_WRITER_H_


================================================
FILE: riegeli/bytes/writer_cfile.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Make `fopencookie()` and `off64_t` available.
#if !defined(_GNU_SOURCE)
#define _GNU_SOURCE
#endif

#include "riegeli/bytes/writer_cfile.h"

#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/types.h>

#include <cerrno>
#include <limits>
#include <optional>
#include <string>
#include <utility>

#include "absl/base/dynamic_annotations.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/errno_mapping.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/cfile_handle.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli::cfile_internal {

WriterCFileCookieBase::~WriterCFileCookieBase() {}

void WriterCFileCookieBase::Initialize(Writer* writer) {
  RIEGELI_ASSERT_NE(writer, nullptr)
      << "Failed precondition of WriterCFile(): null Writer pointer";
  if (flush_type_ != std::nullopt) writer->Flush(*flush_type_);
}

inline const char* WriterCFileCookieBase::OpenMode() {
  Writer& writer = *DestWriter();
  return writer.SupportsReadMode() && writer.SupportsRandomAccess() ? "w+"
                                                                    : "w";
}

inline ssize_t WriterCFileCookieBase::Read(char* dest, size_t length) {
  if (ABSL_PREDICT_FALSE(reader_ == nullptr)) {
    Writer& writer = *DestWriter();
    const Position pos = writer.pos();
    reader_ = writer.ReadMode(pos);
    if (ABSL_PREDICT_FALSE(reader_ == nullptr)) {
      errno = StatusCodeToErrno(writer.status().code());
      return -1;
    }
    if (ABSL_PREDICT_FALSE(reader_->pos() != pos)) {
      if (ABSL_PREDICT_FALSE(!reader_->ok())) {
        errno = StatusCodeToErrno(reader_->status().code());
      } else {
        errno = EINVAL;
      }
      return -1;
    }
  }
  size_t length_read;
  if (ABSL_PREDICT_FALSE(!reader_->ReadSome(length, dest, &length_read) &&
                         !reader_->ok())) {
    errno = StatusCodeToErrno(reader_->status().code());
    return -1;
  }
  return IntCast<ssize_t>(length_read);
}

inline ssize_t WriterCFileCookieBase::Write(const char* src, size_t length) {
  // Msan does not properly track initialization performed by precompiled
  // libraries. The data to write might have been composed by e.g. `fprintf()`.
  ABSL_ANNOTATE_MEMORY_IS_INITIALIZED(src, length);
  Writer& writer = *DestWriter();
  if (ABSL_PREDICT_FALSE(reader_ != nullptr)) {
    const Position pos = reader_->pos();
    reader_ = nullptr;
    if (ABSL_PREDICT_FALSE(!writer.Seek(pos))) {
      if (ABSL_PREDICT_FALSE(!writer.ok())) {
        errno = StatusCodeToErrno(writer.status().code());
      } else {
        errno = EINVAL;
      }
      return 0;
    }
  }
  if (ABSL_PREDICT_FALSE(!writer.Write(absl::string_view(src, length)))) {
    errno = StatusCodeToErrno(writer.status().code());
    return 0;
  }
  if (flush_type_ != std::nullopt) {
    if (ABSL_PREDICT_FALSE(!writer.Flush(*flush_type_))) {
      errno = StatusCodeToErrno(writer.status().code());
      return 0;
    }
  }
  return IntCast<ssize_t>(length);
}

inline std::optional<int64_t> WriterCFileCookieBase::Seek(int64_t offset,
                                                          int whence) {
  Writer& writer = *DestWriter();
  Position new_pos;
  switch (whence) {
    case SEEK_SET:
      if (ABSL_PREDICT_FALSE(offset < 0)) {
        errno = EINVAL;
        return std::nullopt;
      }
      new_pos = IntCast<Position>(offset);
      break;
    case SEEK_CUR:
      new_pos = reader_ != nullptr ? reader_->pos() : writer.pos();
      if (offset < 0) {
        if (ABSL_PREDICT_FALSE(NegatingUnsignedCast(offset) > new_pos)) {
          errno = EINVAL;
          return std::nullopt;
        }
        new_pos -= NegatingUnsignedCast(offset);
        if (ABSL_PREDICT_FALSE(new_pos >
                               Position{std::numeric_limits<int64_t>::max()})) {
          errno = EINVAL;
          return std::nullopt;
        }
      } else {
        if (ABSL_PREDICT_FALSE(
                new_pos > Position{std::numeric_limits<int64_t>::max()} ||
                IntCast<Position>(offset) >
                    Position{std::numeric_limits<int64_t>::max()} - new_pos)) {
          errno = EINVAL;
          return std::nullopt;
        }
        new_pos += IntCast<Position>(offset);
      }
      break;
    case SEEK_END: {
      if (ABSL_PREDICT_FALSE(!writer.SupportsRandomAccess())) {
        // Indicate that `fseek(SEEK_END)` is not supported.
        errno = ESPIPE;
        return std::nullopt;
      }
      std::optional<Position> size;
      if (reader_ != nullptr) {
        RIEGELI_ASSERT(reader_->SupportsSize())
            << "Failed postcondition of Writer::ReadMode(): "
               "!Reader::SupportsSize() even though "
               "Writer::SupportsRandomAccess()";
        size = reader_->Size();
        if (ABSL_PREDICT_FALSE(size == std::nullopt)) {
          errno = StatusCodeToErrno(reader_->status().code());
          return std::nullopt;
        }
      } else {
        size = writer.Size();
        if (ABSL_PREDICT_FALSE(size == std::nullopt)) {
          errno = StatusCodeToErrno(writer.status().code());
          return std::nullopt;
        }
      }
      if (ABSL_PREDICT_FALSE(offset > 0 ||
                             NegatingUnsignedCast(offset) > *size)) {
        errno = EINVAL;
        return std::nullopt;
      }
      new_pos = *size - NegatingUnsignedCast(offset);
      if (ABSL_PREDICT_FALSE(new_pos >
                             Position{std::numeric_limits<int64_t>::max()})) {
        errno = EINVAL;
        return std::nullopt;
      }
    } break;
    default:
      RIEGELI_ASSUME_UNREACHABLE() << "Unknown seek origin: " << whence;
  }
  if (new_pos == (reader_ != nullptr ? reader_->pos() : writer.pos())) {
    // Seeking to the current position is supported even if random access is
    // not.
    return IntCast<int64_t>(new_pos);
  }
  if (reader_ != nullptr) {
    RIEGELI_ASSERT(reader_->SupportsRewind())
        << "Failed postcondition of Writer::ReadMode(): "
           "!Reader::SupportsRewind()";
    if (ABSL_PREDICT_FALSE(!reader_->Seek(IntCast<Position>(new_pos)))) {
      if (ABSL_PREDICT_FALSE(!reader_->ok())) {
        errno = StatusCodeToErrno(reader_->status().code());
      } else {
        errno = EINVAL;
      }
      return std::nullopt;
    }
  } else {
    if (ABSL_PREDICT_FALSE(!writer.SupportsRandomAccess())) {
      // Indicate that `fseek()` is not supported.
      errno = ESPIPE;
      return std::nullopt;
    }
    if (ABSL_PREDICT_FALSE(!writer.Seek(IntCast<Position>(new_pos)))) {
      if (ABSL_PREDICT_FALSE(!writer.ok())) {
        errno = StatusCodeToErrno(writer.status().code());
      } else {
        errno = EINVAL;
      }
      return std::nullopt;
    }
  }
  return IntCast<int64_t>(new_pos);
}

// `extern "C"` sets the C calling convention for compatibility with
// `fopencookie()`. `static` avoids making symbols public, as `extern "C"`
// trumps anonymous namespace.
extern "C" {

static ssize_t WriterCFileRead(void* cookie, char* buf, size_t size) {
  return static_cast<WriterCFileCookieBase*>(cookie)->Read(buf, size);
}

static ssize_t WriterCFileWrite(void* cookie, const char* buf, size_t size) {
  return static_cast<WriterCFileCookieBase*>(cookie)->Write(buf, size);
}

static int WriterCFileSeek(void* cookie, off64_t* offset, int whence) {
  const std::optional<int64_t> new_pos =
      static_cast<WriterCFileCookieBase*>(cookie)->Seek(
          IntCast<int64_t>(*offset), whence);
  if (ABSL_PREDICT_FALSE(new_pos == std::nullopt)) {
    *offset = -1;
    return -1;
  }
  *offset = IntCast<off64_t>(*new_pos);
  return 0;
}

static int WriterCFileClose(void* cookie) {
  const int result = static_cast<WriterCFileCookieBase*>(cookie)->Close();
  delete static_cast<WriterCFileCookieBase*>(cookie);
  if (ABSL_PREDICT_FALSE(result != 0)) {
    errno = result;
    return -1;
  }
  return 0;
}

}  // extern "C"

OwnedCFile WriterCFileImpl(WriterCFileCookieBase* cookie,
                           std::string&& filename) {
  return OwnedCFile(fopencookie(cookie, cookie->OpenMode(),
                                {WriterCFileRead, WriterCFileWrite,
                                 WriterCFileSeek, WriterCFileClose}),
                    std::move(filename));
}

}  // namespace riegeli::cfile_internal


================================================
FILE: riegeli/bytes/writer_cfile.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_WRITER_CFILE_H_
#define RIEGELI_BYTES_WRITER_CFILE_H_

#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/types.h>

#include <optional>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/errno_mapping.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/cfile_handle.h"
#include "riegeli/bytes/path_ref.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

class Reader;

class WriterCFileOptions {
 public:
  WriterCFileOptions() noexcept {}

  // The filename assumed by the returned `OwnedCFile`.
  //
  // Default: "<unspecified>".
  WriterCFileOptions& set_filename(PathInitializer filename) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    riegeli::Reset(filename_, std::move(filename));
    return *this;
  }
  WriterCFileOptions&& set_filename(PathInitializer filename) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_filename(std::move(filename)));
  }
  std::string& filename() ABSL_ATTRIBUTE_LIFETIME_BOUND { return filename_; }
  const std::string& filename() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return filename_;
  }

  // If `std::nullopt`, `fflush()` pushes data buffered in the `FILE` to the
  // `Writer`, but does not call `Writer::Flush()`. There is no way to trigger
  // `Writer::Flush()` from the `FILE`.
  //
  // If a `FlushType`, pushing data buffered in the `FILE` to the `Writer` calls
  // `Writer::Flush()`. This is done by `fflush()`, but also when the `FILE`
  // buffer is full. This degrades performance, but `fflush()` works as
  // expected.
  //
  // There is no way for `WriterCFile()` to distinguish `fflush()` from the
  // `FILE` buffer being full, hence this option.
  //
  // Default: `std::nullopt`.
  WriterCFileOptions& set_flush_type(std::optional<FlushType> flush_type) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    flush_type_ = flush_type;
    return *this;
  }
  WriterCFileOptions&& set_flush_type(std::optional<FlushType> flush_type) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_flush_type(flush_type));
  }
  std::optional<FlushType> flush_type() const { return flush_type_; }

 private:
  std::string filename_ = "<unspecified>";
  std::optional<FlushType> flush_type_;
};

// A `FILE` which writes data to a `Writer`.
//
// This requires `fopencookie()` to be supported by the C library.
//
// The `FILE` supports reading and writing if `Writer::SupportsReadMode()`.
// Otherwise the `FILE` is write-only.
//
// The `FILE` supports `fseek()` if `Writer::SupportsRandomAccess()` or
// `Writer::SupportsReadMode()`, which implies `Reader::SupportsRewind()`.
// Seeking to the current position is always supported.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the `Writer`. `Dest` must support
// `Dependency<Writer*, Dest>`, e.g. `Writer*` (not owned),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
//
// `dest` supports `riegeli::Maker<Dest>(args...)` to construct `Dest` in-place.
//
// The `Writer` must not be accessed until the `FILE` is closed. Warning: this
// includes implicit closing of all `FILE` objects which are still open at
// program exit, hence if the `FILE` persists until program exit, then the
// `Writer` must do so as well.
template <
    typename Dest,
    std::enable_if_t<TargetSupportsDependency<Writer*, Dest>::value, int> = 0>
OwnedCFile WriterCFile(Dest&& dest,
                       WriterCFileOptions options = WriterCFileOptions());

// Implementation details follow.

namespace cfile_internal {

class WriterCFileCookieBase {
 public:
  virtual ~WriterCFileCookieBase();

  const char* OpenMode();

  ssize_t Read(char* dest, size_t length);

  ssize_t Write(const char* src, size_t length);

  // Use `int64_t` instead of `off64_t` to avoid a dependency on
  // `#define _GNU_SOURCE` in a header.
  std::optional<int64_t> Seek(int64_t offset, int whence);

  // Returns 0 on success, or `errno` value on failure.
  virtual int Close() = 0;

 protected:
  explicit WriterCFileCookieBase(std::optional<FlushType> flush_type) noexcept
      : flush_type_(flush_type) {}

  WriterCFileCookieBase(const WriterCFileCookieBase&) = delete;
  WriterCFileCookieBase& operator=(const WriterCFileCookieBase&) = delete;

  virtual Writer* DestWriter() = 0;

  void Initialize(Writer* writer);

  std::optional<FlushType> flush_type_;
  // If `nullptr`, `*DestWriter()` was used last time. If not `nullptr`,
  // `*reader_` was used last time.
  Reader* reader_ = nullptr;
};

template <typename Dest>
class WriterCFileCookie : public WriterCFileCookieBase {
 public:
  explicit WriterCFileCookie(Initializer<Dest> dest,
                             std::optional<FlushType> flush_type)
      : WriterCFileCookieBase(flush_type), dest_(std::move(dest)) {
    Initialize(dest_.get());
  }

  int Close() override;

 protected:
  Writer* DestWriter() override { return dest_.get(); }

 private:
  Dependency<Writer*, Dest> dest_;
};

template <typename Dest>
int WriterCFileCookie<Dest>::Close() {
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      return StatusCodeToErrno(dest_->status().code());
    }
  }
  return 0;
}

OwnedCFile WriterCFileImpl(WriterCFileCookieBase* cookie,
                           std::string&& filename);

}  // namespace cfile_internal

template <typename Dest,
          std::enable_if_t<TargetSupportsDependency<Writer*, Dest>::value, int>>
OwnedCFile WriterCFile(Dest&& dest, WriterCFileOptions options) {
  return cfile_internal::WriterCFileImpl(
      new cfile_internal::WriterCFileCookie<TargetT<Dest>>(
          std::forward<Dest>(dest), options.flush_type()),
      std::move(options.filename()));
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_WRITER_CFILE_H_


================================================
FILE: riegeli/bytes/writer_ostream.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bytes/writer_ostream.h"

#include <stddef.h>

#include <ios>
#include <iosfwd>
#include <limits>
#include <optional>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

namespace stream_internal {

class WriterStreambuf::BufferSync {
 public:
  explicit BufferSync(WriterStreambuf* streambuf) : streambuf_(streambuf) {
    if (streambuf_->reader_ != nullptr) {
      streambuf_->reader_->set_cursor(streambuf_->gptr());
    } else {
      streambuf_->writer_->set_cursor(streambuf_->pptr());
    }
  }

  BufferSync(const BufferSync&) = delete;
  BufferSync& operator=(const BufferSync&) = delete;

  ~BufferSync() {
    if (streambuf_->reader_ != nullptr) {
      streambuf_->setg(const_cast<char*>(streambuf_->reader_->start()),
                       const_cast<char*>(streambuf_->reader_->cursor()),
                       const_cast<char*>(streambuf_->reader_->limit()));
    } else {
      streambuf_->setp(streambuf_->writer_->cursor(),
                       streambuf_->writer_->limit());
    }
  }
  WriterStreambuf* streambuf_;
};

std::optional<Position> WriterStreambuf::MoveBegin() {
  // In a closed `WriterOstream`, `WriterOstream::writer_ != nullptr`
  // does not imply `WriterStreambuf::writer_ != nullptr`, because
  // `WriterOstream::streambuf_` can be left uninitialized.
  if (writer_ == nullptr) return std::nullopt;
  if (reader_ != nullptr) {
    reader_->set_cursor(gptr());
    return reader_->pos();
  } else {
    writer_->set_cursor(pptr());
    return std::nullopt;
  }
}

void WriterStreambuf::MoveEnd(Writer* dest,
                              std::optional<Position> reader_pos) {
  // In a closed `WriterOStream`, `WriterOStream::writer_ != nullptr`
  // does not imply `WriterStreambuf::writer_ != nullptr`, because
  // `WriterOStream::streambuf_` can be left uninitialized.
  if (writer_ == nullptr) return;
  writer_ = dest;
  if (reader_pos != std::nullopt) {
    reader_ = writer_->ReadMode(*reader_pos);
    if (ABSL_PREDICT_FALSE(reader_ == nullptr)) {
      FailWriter();
      setg(nullptr, nullptr, nullptr);
      return;
    }
    if (ABSL_PREDICT_FALSE(reader_->pos() != *reader_pos)) {
      if (!reader_->ok()) {
        FailReader();
      } else {
        state_.Fail(absl::OutOfRangeError(absl::StrCat(
            "Current read position out of range for reading: position ",
            *reader_pos, " > stream size ", reader_->pos())));
      }
      setg(nullptr, nullptr, nullptr);
      return;
    }
    setg(const_cast<char*>(reader_->start()),
         const_cast<char*>(reader_->cursor()),
         const_cast<char*>(reader_->limit()));
  } else {
    setp(writer_->cursor(), writer_->limit());
  }
}

void WriterStreambuf::Done() {
  if (reader_ != nullptr) {
    reader_->set_cursor(gptr());
    setg(nullptr, nullptr, nullptr);
  } else {
    writer_->set_cursor(pptr());
    setp(nullptr, nullptr);
  }
}

void WriterStreambuf::FailReader() { state_.Fail(reader_->status()); }

void WriterStreambuf::FailWriter() { state_.Fail(writer_->status()); }

bool WriterStreambuf::ReadMode() {
  if (ABSL_PREDICT_FALSE(reader_ == nullptr)) {
    setp(nullptr, nullptr);
    const Position new_pos = writer_->pos();
    reader_ = writer_->ReadMode(new_pos);
    if (ABSL_PREDICT_FALSE(reader_ == nullptr)) {
      FailWriter();
      return false;
    }
    if (ABSL_PREDICT_FALSE(reader_->pos() != new_pos)) {
      if (!reader_->ok()) {
        FailReader();
      } else {
        state_.Fail(absl::OutOfRangeError(absl::StrCat(
            "Current write position out of range for reading: position ",
            new_pos, " > stream size ", reader_->pos())));
      }
      return false;
    }
  }
  return true;
}

bool WriterStreambuf::WriteMode() {
  if (ABSL_PREDICT_FALSE(reader_ != nullptr)) {
    setg(nullptr, nullptr, nullptr);
    const Position new_pos = reader_->pos();
    reader_ = nullptr;
    if (ABSL_PREDICT_FALSE(!writer_->Seek(new_pos))) {
      if (!writer_->ok()) {
        FailWriter();
      } else {
        state_.Fail(absl::OutOfRangeError(absl::StrCat(
            "Current read position out of range for writing: position ",
            new_pos, " > stream size ", writer_->pos())));
      }
      return false;
    }
  }
  return true;
}

int WriterStreambuf::sync() {
  if (ABSL_PREDICT_FALSE(!ok())) return -1;
  BufferSync buffer_sync(this);
  if (reader_ != nullptr) {
    if (ABSL_PREDICT_FALSE(!reader_->Sync())) {
      FailReader();
      return -1;
    }
  } else {
    if (ABSL_PREDICT_FALSE(!writer_->Flush())) {
      FailWriter();
      return -1;
    }
  }
  return 0;
}

std::streamsize WriterStreambuf::showmanyc() {
  if (ABSL_PREDICT_FALSE(!ok())) return -1;
  BufferSync buffer_sync(this);
  if (reader_ == nullptr && ABSL_PREDICT_FALSE(!writer_->SupportsReadMode())) {
    // Indicate that reading is not supported.
    return -1;
  }
  if (ABSL_PREDICT_FALSE(!ReadMode())) return -1;
  if (ABSL_PREDICT_FALSE(!reader_->Pull())) {
    if (ABSL_PREDICT_FALSE(!reader_->ok())) FailReader();
    return -1;
  }
  return IntCast<std::streamsize>(reader_->available());
}

int WriterStreambuf::underflow() {
  if (ABSL_PREDICT_FALSE(!ok())) return traits_type::eof();
  BufferSync buffer_sync(this);
  if (reader_ == nullptr && ABSL_PREDICT_FALSE(!writer_->SupportsReadMode())) {
    // Indicate that reading is not supported.
    return traits_type::eof();
  }
  if (ABSL_PREDICT_FALSE(!ReadMode())) return traits_type::eof();
  if (ABSL_PREDICT_FALSE(!reader_->Pull())) {
    if (ABSL_PREDICT_FALSE(!reader_->ok())) FailReader();
    return traits_type::eof();
  }
  return traits_type::to_int_type(*reader_->cursor());
}

std::streamsize WriterStreambuf::xsgetn(char* dest, std::streamsize length) {
  RIEGELI_ASSERT_GE(length, 0)
      << "Failed precondition of streambuf::xsgetn(): negative length";
  if (ABSL_PREDICT_TRUE(length <= egptr() - gptr())) {
    riegeli::null_safe_memcpy(dest, gptr(), IntCast<size_t>(length));
    // Do not use `gbump()` because its parameter has type `int`.
    setg(eback(), gptr() + length, egptr());
    return length;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return 0;
  BufferSync buffer_sync(this);
  if (reader_ == nullptr && ABSL_PREDICT_FALSE(!writer_->SupportsReadMode())) {
    // Indicate that reading is not supported.
    return 0;
  }
  if (ABSL_PREDICT_FALSE(!ReadMode())) return 0;
  size_t length_read;
  if (ABSL_PREDICT_FALSE(
          !reader_->Read(IntCast<size_t>(length), dest, &length_read)) &&
      ABSL_PREDICT_FALSE(!reader_->ok())) {
    FailReader();
  }
  return IntCast<std::streamsize>(length_read);
}

int WriterStreambuf::overflow(int src) {
  if (ABSL_PREDICT_FALSE(!ok())) return traits_type::eof();
  BufferSync buffer_sync(this);
  if (ABSL_PREDICT_FALSE(!WriteMode())) return traits_type::eof();
  if (ABSL_PREDICT_FALSE(!writer_->Push())) {
    FailWriter();
    return traits_type::eof();
  }
  if (src != traits_type::eof()) {
    *writer_->cursor() = traits_type::to_char_type(src);
    writer_->move_cursor(1);
  }
  return traits_type::not_eof(src);
}

std::streamsize WriterStreambuf::xsputn(const char* src,
                                        std::streamsize length) {
  RIEGELI_ASSERT_GE(length, 0)
      << "Failed precondition of streambuf::xsputn(): negative length";
  if (ABSL_PREDICT_TRUE(length <= epptr() - pptr())) {
    riegeli::null_safe_memcpy(pptr(), src, IntCast<size_t>(length));
    // Do not use `pbump()` because its parameter has type `int`.
    setp(pptr() + length, epptr());
    return length;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return 0;
  BufferSync buffer_sync(this);
  if (ABSL_PREDICT_FALSE(!WriteMode())) return 0;
  const Position pos_before = writer_->pos();
  if (ABSL_PREDICT_FALSE(
          !writer_->Write(absl::string_view(src, IntCast<size_t>(length))))) {
    FailWriter();
    // `Write()` could have decreased `pos()` on failure.
    const Position length_written = SaturatingSub(writer_->pos(), pos_before);
    RIEGELI_ASSERT_LE(length_written, IntCast<size_t>(length))
        << "Writer::Write(absl::string_view) wrote more than requested";
    return IntCast<std::streamsize>(length_written);
  }
  return length;
}

std::streampos WriterStreambuf::seekoff(std::streamoff off,
                                        std::ios_base::seekdir dir,
                                        std::ios_base::openmode which) {
  if (ABSL_PREDICT_FALSE(!ok())) return std::streampos(std::streamoff{-1});
  BufferSync buffer_sync(this);
  Position new_pos;
  switch (dir) {
    case std::ios_base::beg:
      if (ABSL_PREDICT_FALSE(off < 0)) {
        return std::streampos(std::streamoff{-1});
      }
      new_pos = IntCast<Position>(off);
      break;
    case std::ios_base::cur:
      new_pos = reader_ != nullptr ? reader_->pos() : writer_->pos();
      if (off < 0) {
        if (ABSL_PREDICT_FALSE(NegatingUnsignedCast(off) > new_pos)) {
          return std::streampos(std::streamoff{-1});
        }
        new_pos -= NegatingUnsignedCast(off);
        if (ABSL_PREDICT_FALSE(
                new_pos >
                Position{std::numeric_limits<std::streamoff>::max()})) {
          return std::streampos(std::streamoff{-1});
        }
      } else {
        if (ABSL_PREDICT_FALSE(
                new_pos >
                    Position{std::numeric_limits<std::streamoff>::max()} ||
                IntCast<Position>(off) >
                    Position{std::numeric_limits<std::streamoff>::max()} -
                        new_pos)) {
          return std::streampos(std::streamoff{-1});
        }
        new_pos += IntCast<Position>(off);
      }
      break;
    case std::ios_base::end: {
      std::optional<Position> size;
      if (reader_ != nullptr) {
        if (ABSL_PREDICT_FALSE(!reader_->SupportsSize())) {
          // Indicate that `seekoff(std::ios_base::end)` is not supported.
          return std::streampos(std::streamoff{-1});
        }
        size = reader_->Size();
        if (ABSL_PREDICT_FALSE(size == std::nullopt)) {
          FailReader();
          return std::streampos(std::streamoff{-1});
        }
      } else {
        if (ABSL_PREDICT_FALSE(!writer_->SupportsRandomAccess())) {
          // Indicate that `seekoff(std::ios_base::end)` is not supported.
          return std::streampos(std::streamoff{-1});
        }
        size = writer_->Size();
        if (ABSL_PREDICT_FALSE(size == std::nullopt)) {
          FailWriter();
          return std::streampos(std::streamoff{-1});
        }
      }
      if (ABSL_PREDICT_FALSE(off > 0 || NegatingUnsignedCast(off) > *size)) {
        return std::streampos(std::streamoff{-1});
      }
      new_pos = *size - NegatingUnsignedCast(off);
      if (ABSL_PREDICT_FALSE(
              new_pos > Position{std::numeric_limits<std::streamoff>::max()})) {
        return std::streampos(std::streamoff{-1});
      }
    } break;
    default:
      RIEGELI_ASSUME_UNREACHABLE()
          << "Unknown seek direction: " << static_cast<int>(dir);
  }
  if ((which & std::ios_base::in) != 0) {
    // Switch to read mode.
    bool seek_ok;
    if (ABSL_PREDICT_FALSE(reader_ == nullptr)) {
      setp(nullptr, nullptr);
      reader_ = writer_->ReadMode(new_pos);
      if (ABSL_PREDICT_FALSE(reader_ == nullptr)) {
        FailWriter();
        return std::streampos(std::streamoff{-1});
      }
      seek_ok = reader_->pos() == new_pos;
    } else {
      RIEGELI_ASSERT(reader_->SupportsRewind())
          << "Failed postcondition of Writer::ReadMode(): "
             "!Reader::SupportsRewind()";
      seek_ok = reader_->Seek(new_pos);
    }
    if (ABSL_PREDICT_FALSE(!seek_ok)) {
      if (ABSL_PREDICT_FALSE(!reader_->ok())) FailReader();
      return std::streampos(std::streamoff{-1});
    }
  } else {
    // Switch to write mode.
    if (ABSL_PREDICT_FALSE(reader_ != nullptr)) {
      setg(nullptr, nullptr, nullptr);
      reader_ = nullptr;
    }
    if (new_pos == writer_->pos()) {
      // Seeking to the current position is supported even if random access is
      // not.
    } else {
      if (ABSL_PREDICT_FALSE(!writer_->SupportsRandomAccess())) {
        // Indicate that `seekoff()` is not supported.
        return std::streampos(std::streamoff{-1});
      }
      if (ABSL_PREDICT_FALSE(!writer_->Seek(new_pos))) {
        if (ABSL_PREDICT_FALSE(!writer_->ok())) FailWriter();
        return std::streampos(std::streamoff{-1});
      }
    }
  }
  return std::streampos(IntCast<std::streamoff>(new_pos));
}

std::streampos WriterStreambuf::seekpos(std::streampos pos,
                                        std::ios_base::openmode which) {
  return seekoff(std::streamoff(pos), std::ios_base::beg, which);
}

}  // namespace stream_internal

bool WriterOStreamBase::close() {
  Done();
  return not_failed();
}

}  // namespace riegeli


================================================
FILE: riegeli/bytes/writer_ostream.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BYTES_WRITER_OSTREAM_H_
#define RIEGELI_BYTES_WRITER_OSTREAM_H_

#include <ios>
#include <iosfwd>
#include <istream>
#include <optional>
#include <streambuf>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

class Reader;

namespace stream_internal {

class WriterStreambuf : public std::streambuf {
 public:
  explicit WriterStreambuf(Closed) noexcept : state_(kClosed) {}

  WriterStreambuf() noexcept {}

  WriterStreambuf(WriterStreambuf&& that) noexcept;
  WriterStreambuf& operator=(WriterStreambuf&& that) noexcept;

  void Initialize(Writer* dest);
  std::optional<Position> MoveBegin();
  void MoveEnd(Writer* dest, std::optional<Position> reader_pos);
  void Done();

  bool ok() const { return state_.ok(); }
  bool is_open() const { return state_.is_open(); }
  bool not_failed() const { return state_.not_failed(); }
  absl::Status status() const { return state_.status(); }
  void MarkClosed() { state_.MarkClosed(); }
  ABSL_ATTRIBUTE_COLD void FailReader();
  ABSL_ATTRIBUTE_COLD void FailWriter();

 protected:
  int sync() override;
  std::streamsize showmanyc() override;
  int underflow() override;
  std::streamsize xsgetn(char* dest, std::streamsize length) override;
  int overflow(int src) override;
  std::streamsize xsputn(const char* src, std::streamsize length) override;
  std::streampos seekoff(std::streamoff off, std::ios_base::seekdir dir,
                         std::ios_base::openmode which) override;
  std::streampos seekpos(std::streampos pos,
                         std::ios_base::openmode which) override;

 private:
  class BufferSync;

  bool ReadMode();
  bool WriteMode();

  ObjectState state_;
  Writer* writer_ = nullptr;
  // If `nullptr`, `*writer_` was used last time. If not `nullptr`, `*reader_`
  // was used last time.
  Reader* reader_ = nullptr;

  // Invariants:
  //   `is_open() && reader_ == nullptr ? pbase() >= writer_->start()
  //                                    : pbase() == nullptr`
  //   `epptr() == (is_open() && reader_ == nullptr ? writer_->limit()
  //                                                : nullptr)`
  //   `eback() == (is_open() && reader_ != nullptr ? reader_->start()
  //                                                : nullptr)`
  //   `egptr() == (is_open() && reader_ != nullptr ? reader_->limit()
  //                                                : nullptr)`
};

}  // namespace stream_internal

// Template parameter independent part of `WriterOStream`.
class WriterOStreamBase : public std::iostream {
 public:
  class Options {
   public:
    Options() noexcept {}
  };

  // Returns the `Writer`. Unchanged by `close()`.
  virtual Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // If `!is_open()`, does nothing. Otherwise:
  //  * Synchronizes the current `WriterOStream` position to the `Writer`.
  //  * Closes the `Writer` if it is owned.
  //
  // Returns `true` if the `Writer` did not fail, i.e. if it was OK just before
  // becoming closed.
  //
  // Destroying or assigning to a `WriterOStream` closes it implicitly, but an
  // explicit `close()` call allows to detect failures (use `status()` for
  // failure details).
  bool close();

  // Returns `true` if the `WriterOStream` is OK, i.e. open and not failed.
  bool ok() const { return streambuf_.ok(); }

  // Returns `true` if the `WriterOStream` is open, i.e. not closed.
  bool is_open() const { return streambuf_.is_open(); }

  // Returns `true` if the `WriterOStream` is not failed.
  bool not_failed() const { return streambuf_.not_failed(); }

  // Returns an `absl::Status` describing the failure if the `WriterOStream`
  // is failed, or an `absl::FailedPreconditionError()` if the `WriterOStream`
  // is successfully closed, or `absl::OkStatus()` if the `WriterOStream` is OK.
  absl::Status status() const { return streambuf_.status(); }

  // Supports `Dependency`.
  friend MakerType<Closed> RiegeliDependencySentinel(WriterOStreamBase*) {
    return {kClosed};
  }

 protected:
  explicit WriterOStreamBase(Closed) noexcept
      : std::iostream(&streambuf_), streambuf_(kClosed) {}

  WriterOStreamBase() noexcept : std::iostream(&streambuf_) {}

  WriterOStreamBase(WriterOStreamBase&& that) noexcept;
  WriterOStreamBase& operator=(WriterOStreamBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(Writer* dest);

  virtual void Done() = 0;

  stream_internal::WriterStreambuf streambuf_;

  // Invariant: `rdbuf() == &streambuf_`
};

// Adapts a `Writer` to a `std::iostream`.
//
// The `std::iostream` supports reading and writing if
// `Writer::SupportsReadMode()`, with a single position maintained for both
// reading and writing. Otherwise the `std::iostream` is write-only, and only
// the `std::ostream` aspect of it is functional.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the `Writer`. `Dest` must support
// `Dependency<Writer*, Dest>`, e.g. `Writer*` (not owned, default),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The `Writer` must not be accessed until the `WriterOStream` is closed or no
// longer used, except that it is allowed to read the destination of the
// `Writer` immediately after `flush()`.
//
// Destroying or assigning to a `WriterOStream` closes it first.
template <typename Dest = Writer*>
class WriterOStream : public WriterOStreamBase {
 public:
  // Creates a closed `WriterOStream`.
  explicit WriterOStream(Closed) noexcept : WriterOStreamBase(kClosed) {}

  // Will write to the `Writer` provided by `dest`.
  explicit WriterOStream(Initializer<Dest> dest, Options options = Options());

  // These operations cannot be defaulted because `WriterOStreamBase` virtually
  // derives from `std::ios` which has these operations deleted.
  WriterOStream(WriterOStream&& that) noexcept
#if __cpp_concepts
    requires std::is_move_constructible_v<Dependency<Writer*, Dest>>
#endif
      : WriterOStreamBase(static_cast<WriterOStreamBase&&>(that)),
        dest_(std::move(that.dest_), *this, that) {
  }
  WriterOStream& operator=(WriterOStream&& that) noexcept
#if __cpp_concepts
    requires(std::is_move_assignable_v<Dependency<Writer*, Dest>>)
#endif
  {
    WriterOStreamBase::operator=(static_cast<WriterOStreamBase&&>(that));
    dest_.Reset(std::move(that.dest_), *this, that);
    return *this;
  }

  ~WriterOStream() override { Done(); }

  // Makes `*this` equivalent to a newly constructed `WriterOStream`. This
  // avoids constructing a temporary `WriterOStream` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the `Writer`. Unchanged by
  // `close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;

 private:
  class Mover;

  // The object providing and possibly owning the `Writer`.
  MovingDependency<Writer*, Dest, Mover> dest_;
};

explicit WriterOStream(Closed) -> WriterOStream<DeleteCtad<Closed>>;
template <typename Dest>
explicit WriterOStream(Dest&& dest, WriterOStreamBase::Options options =
                                        WriterOStreamBase::Options())
    -> WriterOStream<TargetT<Dest>>;

// Implementation details follow.

namespace stream_internal {

inline WriterStreambuf::WriterStreambuf(WriterStreambuf&& that) noexcept
    : std::streambuf(that),
      state_(std::move(that.state_)),
      writer_(that.writer_),
      reader_(that.reader_) {
  that.setg(nullptr, nullptr, nullptr);
  that.setp(nullptr, nullptr);
}

inline WriterStreambuf& WriterStreambuf::operator=(
    WriterStreambuf&& that) noexcept {
  if (ABSL_PREDICT_TRUE(&that != this)) {
    std::streambuf::operator=(that);
    state_ = std::move(that.state_);
    writer_ = that.writer_;
    reader_ = that.reader_;
    that.setg(nullptr, nullptr, nullptr);
    that.setp(nullptr, nullptr);
  }
  return *this;
}

inline void WriterStreambuf::Initialize(Writer* dest) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of WriterStreambuf: null Writer pointer";
  writer_ = dest;
  setp(writer_->cursor(), writer_->limit());
  if (ABSL_PREDICT_FALSE(!writer_->ok())) FailWriter();
}

}  // namespace stream_internal

inline WriterOStreamBase::WriterOStreamBase(WriterOStreamBase&& that) noexcept
    : std::iostream(static_cast<std::iostream&&>(that)),
      streambuf_(std::move(that.streambuf_)) {
  set_rdbuf(&streambuf_);
}

inline WriterOStreamBase& WriterOStreamBase::operator=(
    WriterOStreamBase&& that) noexcept {
  if (ABSL_PREDICT_TRUE(&that != this)) {
    Done();
    std::iostream::operator=(static_cast<std::iostream&&>(that));
    streambuf_ = std::move(that.streambuf_);
  }
  return *this;
}

inline void WriterOStreamBase::Reset(Closed) {
  Done();
  streambuf_ = stream_internal::WriterStreambuf(kClosed);
  init(&streambuf_);
}

inline void WriterOStreamBase::Reset() {
  Done();
  streambuf_ = stream_internal::WriterStreambuf();
  init(&streambuf_);
}

inline void WriterOStreamBase::Initialize(Writer* dest) {
  streambuf_.Initialize(dest);
  if (ABSL_PREDICT_FALSE(!streambuf_.ok())) setstate(std::ios_base::badbit);
}

template <typename Dest>
class WriterOStream<Dest>::Mover {
 public:
  explicit Mover(WriterOStream& self)
      : reader_pos_(self.streambuf_.MoveBegin()) {}

  void Done(WriterOStream& self) {
    self.streambuf_.MoveEnd(self.dest_.get(), reader_pos_);
  }

 private:
  std::optional<Position> reader_pos_;
};

template <typename Dest>
inline WriterOStream<Dest>::WriterOStream(Initializer<Dest> dest,
                                          Options options)
    : dest_(std::move(dest)) {
  Initialize(dest_.get());
}

template <typename Dest>
inline void WriterOStream<Dest>::Reset(Closed) {
  WriterOStreamBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void WriterOStream<Dest>::Reset(Initializer<Dest> dest,
                                       Options options) {
  WriterOStreamBase::Reset();
  dest_.Reset(std::move(dest));
  Initialize(dest_.get());
}

template <typename Dest>
void WriterOStream<Dest>::Done() {
  if (ABSL_PREDICT_FALSE(!is_open())) return;
  streambuf_.Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) streambuf_.FailWriter();
  }
  if (ABSL_PREDICT_FALSE(!streambuf_.ok())) setstate(std::ios_base::badbit);
  streambuf_.MarkClosed();
}

}  // namespace riegeli

#endif  // RIEGELI_BYTES_WRITER_OSTREAM_H_


================================================
FILE: riegeli/bzip2/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "bzip2_reader",
    srcs = ["bzip2_reader.cc"],
    hdrs = ["bzip2_reader.h"],
    deps = [
        ":bzip2_error",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:buffer_options",
        "//riegeli/bytes:buffered_reader",
        "//riegeli/bytes:reader",
        "@bzip2//:bz2",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "bzip2_writer",
    srcs = ["bzip2_writer.cc"],
    hdrs = ["bzip2_writer.h"],
    deps = [
        ":bzip2_error",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:buffer_options",
        "//riegeli/bytes:buffered_writer",
        "//riegeli/bytes:writer",
        "@bzip2//:bz2",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "bzip2_error",
    srcs = ["bzip2_error.cc"],
    hdrs = ["bzip2_error.h"],
    visibility = ["//visibility:private"],
    deps = [
        "//riegeli/base:assert",
        "@bzip2//:bz2",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ],
)


================================================
FILE: riegeli/bzip2/bzip2_error.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bzip2/bzip2_error.h"

#include <string>

#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "bzlib.h"
#include "riegeli/base/assert.h"

namespace riegeli::bzip2_internal {

absl::Status Bzip2ErrorToStatus(absl::string_view operation, int bzlib_code) {
  absl::StatusCode code;
  switch (bzlib_code) {
    case BZ_OK:
    case BZ_RUN_OK:
    case BZ_FLUSH_OK:
    case BZ_FINISH_OK:
      return absl::OkStatus();
    case BZ_DATA_ERROR:
    case BZ_DATA_ERROR_MAGIC:
      code = absl::StatusCode::kInvalidArgument;
      break;
    case BZ_MEM_ERROR:
      code = absl::StatusCode::kResourceExhausted;
      break;
    default:
      // Should not happen.
      code = absl::StatusCode::kInternal;
      break;
  }
  std::string message = absl::StrCat(operation, " failed");
  absl::string_view details;
  switch (bzlib_code) {
    case BZ_OK:
    case BZ_RUN_OK:
    case BZ_FLUSH_OK:
    case BZ_FINISH_OK:
      RIEGELI_ASSUME_UNREACHABLE() << "Handled before switch";
    case BZ_STREAM_END:
      details = "stream end";
      break;
    case BZ_SEQUENCE_ERROR:
      details = "sequence error";
      break;
    case BZ_PARAM_ERROR:
      details = "parameter error";
      break;
    case BZ_MEM_ERROR:
      details = "memory error";
      break;
    case BZ_DATA_ERROR:
      details = "data error";
      break;
    case BZ_DATA_ERROR_MAGIC:
      details = "data error (magic)";
      break;
    case BZ_IO_ERROR:
      details = "I/O error";
      break;
    case BZ_UNEXPECTED_EOF:
      details = "unexpected EOF";
      break;
    case BZ_OUTBUFF_FULL:
      details = "output buffer full";
      break;
    case BZ_CONFIG_ERROR:
      details = "config error";
      break;
    default:
      absl::StrAppend(&message, ": unknown bzlib error code: ", bzlib_code);
      break;
  }
  if (!details.empty()) absl::StrAppend(&message, ": ", details);
  return absl::Status(code, message);
}

}  // namespace riegeli::bzip2_internal


================================================
FILE: riegeli/bzip2/bzip2_error.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BZIP2_BZIP2_ERROR_H_
#define RIEGELI_BZIP2_BZIP2_ERROR_H_

#include "absl/status/status.h"
#include "absl/strings/string_view.h"

namespace riegeli::bzip2_internal {

absl::Status Bzip2ErrorToStatus(absl::string_view operation, int bzlib_code);

}  // namespace riegeli::bzip2_internal

#endif  // RIEGELI_BZIP2_BZIP2_ERROR_H_


================================================
FILE: riegeli/bzip2/bzip2_reader.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bzip2/bzip2_reader.h"

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <memory>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "bzlib.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bzip2/bzip2_error.h"

namespace riegeli {

void Bzip2ReaderBase::Initialize(Reader* src) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of Bzip2Reader: null Reader pointer";
  if (ABSL_PREDICT_FALSE(!src->ok()) && src->available() == 0) {
    FailWithoutAnnotation(AnnotateOverSrc(src->status()));
    return;
  }
  initial_compressed_pos_ = src->pos();
  InitializeDecompressor();
}

inline void Bzip2ReaderBase::InitializeDecompressor() {
  decompressor_ = riegeli::Maker<bz_stream>();
  const int bzlib_code = BZ2_bzDecompressInit(decompressor_.get(), 0, 0);
  if (ABSL_PREDICT_FALSE(bzlib_code != BZ_OK)) {
    delete decompressor_.release();  // Skip `BZ2_bzDecompressEnd()`.
    FailOperation("BZ2_bzDecompressInit()", bzlib_code);
  }
}

void Bzip2ReaderBase::Done() {
  if (ABSL_PREDICT_FALSE(truncated_)) {
    Reader& src = *SrcReader();
    FailWithoutAnnotation(AnnotateOverSrc(src.AnnotateStatus(
        absl::InvalidArgumentError("Truncated Bzip2-compressed stream"))));
  }
  BufferedReader::Done();
  decompressor_.reset();
}

inline bool Bzip2ReaderBase::FailOperation(absl::string_view operation,
                                           int bzlib_code) {
  RIEGELI_ASSERT(bzlib_code != BZ_OK && bzlib_code != BZ_RUN_OK &&
                 bzlib_code != BZ_FLUSH_OK && bzlib_code != BZ_FINISH_OK)
      << "Failed precondition of Bzip2ReaderBase::FailOperation(): "
         "bzlib error code not failed";
  RIEGELI_ASSERT(is_open())
      << "Failed precondition of Bzip2ReaderBase::FailOperation(): "
         "Object closed";
  return Fail(bzip2_internal::Bzip2ErrorToStatus(operation, bzlib_code));
}

absl::Status Bzip2ReaderBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    if (ABSL_PREDICT_FALSE(truncated_)) {
      status = Annotate(status, "reading truncated Bzip2-compressed stream");
    }
    Reader& src = *SrcReader();
    status = src.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `src` with the compressed position.
  // Clarify that the current position is the uncompressed position instead of
  // delegating to `BufferedReader::AnnotateStatusImpl()`.
  return AnnotateOverSrc(std::move(status));
}

absl::Status Bzip2ReaderBase::AnnotateOverSrc(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("at uncompressed byte ", pos()));
  }
  return status;
}

bool Bzip2ReaderBase::ReadInternal(size_t min_length, size_t max_length,
                                   char* dest) {
  RIEGELI_ASSERT_GT(min_length, 0u)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "nothing to read";
  RIEGELI_ASSERT_GE(max_length, min_length)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "max_length < min_length";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedReader::ReadInternal()";
  Reader& src = *SrcReader();
  truncated_ = false;
  max_length = UnsignedMin(max_length,
                           std::numeric_limits<Position>::max() - limit_pos());
  decompressor_->next_out = dest;
  for (;;) {
    decompressor_->avail_out = SaturatingIntCast<unsigned int>(
        PtrDistance(decompressor_->next_out, dest + max_length));
    decompressor_->next_in = const_cast<char*>(src.cursor());
    decompressor_->avail_in = SaturatingIntCast<unsigned int>(src.available());
    if (decompressor_->avail_in > 0) stream_had_data_ = true;
    int bzlib_code = BZ2_bzDecompress(decompressor_.get());
    src.set_cursor(decompressor_->next_in);
    const size_t length_read = PtrDistance(dest, decompressor_->next_out);
    switch (bzlib_code) {
      case BZ_OK:
        if (length_read >= min_length) break;
        if (ABSL_PREDICT_FALSE(decompressor_->avail_in > 0)) {
          RIEGELI_ASSERT_EQ(decompressor_->avail_out, 0u)
              << "BZ2_bzDecompress() returned but there are still input data "
                 "and output space";
          RIEGELI_ASSERT_EQ(length_read,
                            std::numeric_limits<Position>::max() - limit_pos())
              << "The position does not overflow but the output buffer is "
                 "full, while less than min_length was output, which is "
                 "impossible because the buffer has size max_length which is "
                 "at least min_length if the position does not overflow";
          move_limit_pos(length_read);
          return FailOverflow();
        }
        if (ABSL_PREDICT_FALSE(!src.Pull())) {
          move_limit_pos(length_read);
          if (ABSL_PREDICT_FALSE(!src.ok())) {
            return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
          }
          if (ABSL_PREDICT_FALSE(!concatenate_ || stream_had_data_)) {
            truncated_ = true;
          }
          return false;
        }
        continue;
      case BZ_STREAM_END:
        if (concatenate_) {
          bzlib_code = BZ2_bzDecompressEnd(decompressor_.get());
          if (ABSL_PREDICT_FALSE(bzlib_code != BZ_OK)) {
            FailOperation("BZ2_bzDecompressEnd()", bzlib_code);
            break;
          }
          // Not needed for `libbz2` but needed for `libbz2_rs_sys`.
          decompressor_->bzalloc = nullptr;
          decompressor_->bzfree = nullptr;
          decompressor_->opaque = nullptr;
          bzlib_code = BZ2_bzDecompressInit(decompressor_.get(), 0, 0);
          if (ABSL_PREDICT_FALSE(bzlib_code != BZ_OK)) {
            delete decompressor_.release();  // Skip `BZ2_bzDecompressEnd()`.
            FailOperation("BZ2_bzDecompressInit()", bzlib_code);
            break;
          }
          stream_had_data_ = false;
          if (length_read >= min_length) break;
          continue;
        }
        decompressor_.reset();
        move_limit_pos(length_read);
        // Avoid `BufferedReader` allocating another buffer.
        set_exact_size(limit_pos());
        return length_read >= min_length;
      default:
        FailOperation("BZ2_bzDecompress()", bzlib_code);
        break;
    }
    move_limit_pos(length_read);
    return length_read >= min_length;
  }
}

void Bzip2ReaderBase::ExactSizeReached() {
  if (decompressor_ == nullptr) return;
  char buffer[1];
  if (ABSL_PREDICT_FALSE(Bzip2ReaderBase::ReadInternal(1, 1, buffer))) {
    decompressor_.reset();
    Fail(absl::FailedPreconditionError(
        "Uncompressed size reached but more data can be decompressed, "
        "which implies that seeking back and reading again encountered "
        "changed Bzip2-compressed data"));
  }
}

bool Bzip2ReaderBase::ToleratesReadingAhead() {
  Reader* const src = SrcReader();
  return src != nullptr && src->ToleratesReadingAhead();
}

bool Bzip2ReaderBase::SupportsRewind() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsRewind();
}

bool Bzip2ReaderBase::SeekBehindBuffer(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "buffer not empty";
  if (new_pos <= limit_pos()) {
    // Seeking backwards.
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    Reader& src = *SrcReader();
    truncated_ = false;
    stream_had_data_ = false;
    set_buffer();
    set_limit_pos(0);
    decompressor_.reset();
    if (ABSL_PREDICT_FALSE(!src.Seek(initial_compressed_pos_))) {
      return FailWithoutAnnotation(AnnotateOverSrc(src.StatusOrAnnotate(
          absl::DataLossError("Bzip2-compressed stream got truncated"))));
    }
    InitializeDecompressor();
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    if (new_pos == 0) return true;
  }
  return BufferedReader::SeekBehindBuffer(new_pos);
}

bool Bzip2ReaderBase::SupportsNewReader() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsNewReader();
}

std::unique_ptr<Reader> Bzip2ReaderBase::NewReaderImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point
  // if `SrcReader()->SupportsNewReader()`.
  Reader& src = *SrcReader();
  std::unique_ptr<Reader> compressed_reader =
      src.NewReader(initial_compressed_pos_);
  if (ABSL_PREDICT_FALSE(compressed_reader == nullptr)) {
    FailWithoutAnnotation(AnnotateOverSrc(src.status()));
    return nullptr;
  }
  std::unique_ptr<Reader> reader =
      std::make_unique<Bzip2Reader<std::unique_ptr<Reader>>>(
          std::move(compressed_reader),
          Bzip2ReaderBase::Options()
              .set_concatenate(concatenate_)
              .set_buffer_options(buffer_options()));
  reader->Seek(initial_pos);
  return reader;
}

namespace {

inline bool GetByte(Reader& src, size_t offset, uint8_t& value) {
  if (!src.Pull(offset + 1, 8)) return false;
  value = static_cast<uint8_t>(src.cursor()[offset]);
  return true;
}

}  // namespace

bool RecognizeBzip2(Reader& src) {
  // Based on https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf
  uint8_t byte;
  // HeaderMagic: "BZh1" to "BZh9".
  if (!(GetByte(src, 0, byte) && byte == 'B' && GetByte(src, 1, byte) &&
        byte == 'Z' && GetByte(src, 2, byte) && byte == 'h' &&
        GetByte(src, 3, byte) && byte >= '1' && byte <= '9' &&
        GetByte(src, 4, byte))) {
    return false;
  }
  switch (byte) {
    case 0x31:
      // BlockMagic: 0x31, 0x41, 0x59, 0x26.
      return GetByte(src, 5, byte) && byte == 0x41 && GetByte(src, 6, byte) &&
             byte == 0x59 && GetByte(src, 7, byte) && byte == 0x26;
    case 0x17:
      // FooterMagic: 0x17, 0x72, 0x45, 0x38.
      return GetByte(src, 5, byte) && byte == 0x72 && GetByte(src, 6, byte) &&
             byte == 0x45 && GetByte(src, 7, byte) && byte == 0x38;
    default:
      return false;
  }
}

}  // namespace riegeli


================================================
FILE: riegeli/bzip2/bzip2_reader.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BZIP2_BZIP2_READER_H_
#define RIEGELI_BZIP2_BZIP2_READER_H_

#include <stddef.h>

#include <memory>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "bzlib.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_reader.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

// Template parameter independent part of `Bzip2Reader`.
class Bzip2ReaderBase : public BufferedReader {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // If `true`, concatenated compressed streams are decoded to concatenation
    // of their decompressed contents. An empty compressed stream is decoded to
    // empty decompressed contents.
    //
    // If `false`, exactly one compressed stream is consumed.
    //
    // Default: `false`.
    Options& set_concatenate(bool concatenate) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      concatenate_ = concatenate;
      return *this;
    }
    Options&& set_concatenate(bool concatenate) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_concatenate(concatenate));
    }
    bool concatenate() const { return concatenate_; }

   private:
    bool concatenate_ = false;
  };

  // Returns the compressed `Reader`. Unchanged by `Close()`.
  virtual Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns `true` if the source is truncated (without a clean end of the
  // compressed stream) at the current position. In such case, if the source
  // does not grow, `Close()` will fail.
  //
  // Precondition: `Options::concatenate()` was `false`.
  bool truncated() const {
    RIEGELI_ASSERT(!concatenate_)
        << "Failed precondition of Bzip2ReaderBase::truncated(): "
           "Options::concatenate() is true";
    return truncated_ && available() == 0;
  }

  bool ToleratesReadingAhead() override;
  bool SupportsRewind() override;
  bool SupportsNewReader() override;

 protected:
  explicit Bzip2ReaderBase(Closed) noexcept : BufferedReader(kClosed) {}

  explicit Bzip2ReaderBase(BufferOptions buffer_options, bool concatenate);

  Bzip2ReaderBase(Bzip2ReaderBase&& that) noexcept;
  Bzip2ReaderBase& operator=(Bzip2ReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options, bool concatenate);
  void Initialize(Reader* src);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverSrc(absl::Status status);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool ReadInternal(size_t min_length, size_t max_length, char* dest) override;
  void ExactSizeReached() override;
  bool SeekBehindBuffer(Position new_pos) override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;

 private:
  struct BZStreamDeleter {
    void operator()(bz_stream* ptr) const {
      const int bzlib_code = BZ2_bzDecompressEnd(ptr);
      RIEGELI_ASSERT_EQ(bzlib_code, BZ_OK)
          << "BZ2_bzDecompressEnd() failed: " << bzlib_code;
      delete ptr;
    }
  };

  void InitializeDecompressor();
  ABSL_ATTRIBUTE_COLD bool FailOperation(absl::string_view operation,
                                         int bzlib_code);

  bool concatenate_ = false;
  // If `true`, the source is truncated (without a clean end of the compressed
  // stream) at the current position. If the source does not grow, `Close()`
  // will fail.
  Position initial_compressed_pos_ = 0;
  bool truncated_ = false;
  // If `true`, some compressed data from the current stream were processed.
  // If `concatenate_` and `!stream_had_data_`, an end of the source is
  // legitimate, it does not imply that the source is truncated.
  bool stream_had_data_ = false;
  // If `ok()` but `decompressor_ == nullptr` then all data have been
  // decompressed, `exact_size() == limit_pos()`, and `ReadInternal()` must not
  // be called again.
  std::unique_ptr<bz_stream, BZStreamDeleter> decompressor_;
};

// A `Reader` which decompresses data with Bzip2 after getting it from another
// `Reader`.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the compressed `Reader`. `Src` must support
// `Dependency<Reader*, Src>`, e.g. `Reader*` (not owned, default),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The compressed `Reader` must not be accessed until the `Bzip2Reader` is
// closed or no longer used.
template <typename Src = Reader*>
class Bzip2Reader : public Bzip2ReaderBase {
 public:
  // Creates a closed `Bzip2Reader`.
  explicit Bzip2Reader(Closed) noexcept : Bzip2ReaderBase(kClosed) {}

  // Will read from the compressed `Reader` provided by `src`.
  explicit Bzip2Reader(Initializer<Src> src, Options options = Options());

  Bzip2Reader(Bzip2Reader&&) = default;
  Bzip2Reader& operator=(Bzip2Reader&&) = default;

  // Makes `*this` equivalent to a newly constructed `Bzip2Reader`. This avoids
  // constructing a temporary `Bzip2Reader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());

  // Returns the object providing and possibly owning the compressed `Reader`.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;
  void SetReadAllHintImpl(bool read_all_hint) override;
  void VerifyEndImpl() override;

 private:
  // The object providing and possibly owning the compressed `Reader`.
  Dependency<Reader*, Src> src_;
};

explicit Bzip2Reader(Closed) -> Bzip2Reader<DeleteCtad<Closed>>;
template <typename Src>
explicit Bzip2Reader(
    Src&& src, Bzip2ReaderBase::Options options = Bzip2ReaderBase::Options())
    -> Bzip2Reader<TargetT<Src>>;

// Returns `true` if the data look like they have been Bzip2-compressed.
//
// The current position of `src` is unchanged.
bool RecognizeBzip2(Reader& src);

// Implementation details follow.

inline Bzip2ReaderBase::Bzip2ReaderBase(BufferOptions buffer_options,
                                        bool concatenate)
    : BufferedReader(buffer_options), concatenate_(concatenate) {}

inline Bzip2ReaderBase::Bzip2ReaderBase(Bzip2ReaderBase&& that) noexcept
    : BufferedReader(static_cast<BufferedReader&&>(that)),
      concatenate_(that.concatenate_),
      initial_compressed_pos_(that.initial_compressed_pos_),
      truncated_(that.truncated_),
      stream_had_data_(that.stream_had_data_),
      decompressor_(std::move(that.decompressor_)) {}

inline Bzip2ReaderBase& Bzip2ReaderBase::operator=(
    Bzip2ReaderBase&& that) noexcept {
  BufferedReader::operator=(static_cast<BufferedReader&&>(that));
  concatenate_ = that.concatenate_;
  initial_compressed_pos_ = that.initial_compressed_pos_;
  truncated_ = that.truncated_;
  stream_had_data_ = that.stream_had_data_;
  decompressor_ = std::move(that.decompressor_);
  return *this;
}

inline void Bzip2ReaderBase::Reset(Closed) {
  BufferedReader::Reset(kClosed);
  concatenate_ = false;
  initial_compressed_pos_ = 0;
  truncated_ = false;
  stream_had_data_ = false;
  decompressor_.reset();
}

inline void Bzip2ReaderBase::Reset(BufferOptions buffer_options,
                                   bool concatenate) {
  BufferedReader::Reset(buffer_options);
  concatenate_ = concatenate;
  initial_compressed_pos_ = 0;
  truncated_ = false;
  stream_had_data_ = false;
  decompressor_.reset();
}

template <typename Src>
inline Bzip2Reader<Src>::Bzip2Reader(Initializer<Src> src, Options options)
    : Bzip2ReaderBase(options.buffer_options(), options.concatenate()),
      src_(std::move(src)) {
  Initialize(src_.get());
}

template <typename Src>
inline void Bzip2Reader<Src>::Reset(Closed) {
  Bzip2ReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void Bzip2Reader<Src>::Reset(Initializer<Src> src, Options options) {
  Bzip2ReaderBase::Reset(options.buffer_options(), options.concatenate());
  src_.Reset(std::move(src));
  Initialize(src_.get());
}

template <typename Src>
void Bzip2Reader<Src>::Done() {
  Bzip2ReaderBase::Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      FailWithoutAnnotation(AnnotateOverSrc(src_->status()));
    }
  }
}

template <typename Src>
void Bzip2Reader<Src>::SetReadAllHintImpl(bool read_all_hint) {
  Bzip2ReaderBase::SetReadAllHintImpl(read_all_hint);
  if (src_.IsOwning()) src_->SetReadAllHint(read_all_hint);
}

template <typename Src>
void Bzip2Reader<Src>::VerifyEndImpl() {
  Bzip2ReaderBase::VerifyEndImpl();
  if (src_.IsOwning() && ABSL_PREDICT_TRUE(ok())) src_->VerifyEnd();
}

}  // namespace riegeli

#endif  // RIEGELI_BZIP2_BZIP2_READER_H_


================================================
FILE: riegeli/bzip2/bzip2_writer.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/bzip2/bzip2_writer.h"

#include <stddef.h>

#include <limits>
#include <memory>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "bzlib.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/bzip2/bzip2_error.h"

namespace riegeli {

void Bzip2WriterBase::Initialize(Writer* dest, int compression_level) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of Bzip2Writer: null Writer pointer";
  if (ABSL_PREDICT_FALSE(!dest->ok())) {
    FailWithoutAnnotation(AnnotateOverDest(dest->status()));
    return;
  }
  initial_compressed_pos_ = dest->pos();
  compressor_ = riegeli::Maker<bz_stream>();
  const int bzlib_code =
      BZ2_bzCompressInit(compressor_.get(), compression_level, 0, 0);
  if (ABSL_PREDICT_FALSE(bzlib_code != BZ_OK)) {
    delete compressor_.release();  // Skip `BZ2_bzCompressEnd()`.
    FailOperation("BZ2_bzCompressInit()", bzlib_code);
  }
}

void Bzip2WriterBase::DoneBehindBuffer(absl::string_view src) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::DoneBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!ok())) return;
  Writer& dest = *DestWriter();
  WriteInternal(src, dest, BZ_FINISH);
}

void Bzip2WriterBase::Done() {
  BufferedWriter::Done();
  compressor_.reset();
}

inline bool Bzip2WriterBase::FailOperation(absl::string_view operation,
                                           int bzlib_code) {
  RIEGELI_ASSERT(bzlib_code != BZ_OK && bzlib_code != BZ_RUN_OK &&
                 bzlib_code != BZ_FLUSH_OK && bzlib_code != BZ_FINISH_OK)
      << "Failed precondition of Bzip2WriterBase::FailOperation(): "
         "bzlib error code not failed";
  RIEGELI_ASSERT(is_open())
      << "Failed precondition of Bzip2WriterBase::FailOperation(): "
         "Object closed";
  return Fail(bzip2_internal::Bzip2ErrorToStatus(operation, bzlib_code));
}

absl::Status Bzip2WriterBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    Writer& dest = *DestWriter();
    status = dest.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `dest` with the compressed
  // position. Clarify that the current position is the uncompressed position
  // instead of delegating to `BufferedWriter::AnnotateStatusImpl()`.
  return AnnotateOverDest(std::move(status));
}

absl::Status Bzip2WriterBase::AnnotateOverDest(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("at uncompressed byte ", pos()));
  }
  return status;
}

bool Bzip2WriterBase::WriteInternal(absl::string_view src) {
  RIEGELI_ASSERT(!src.empty())
      << "Failed precondition of BufferedWriter::WriteInternal(): "
         "nothing to write";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedWriter::WriteInternal()";
  Writer& dest = *DestWriter();
  return WriteInternal(src, dest, BZ_RUN);
}

inline bool Bzip2WriterBase::WriteInternal(absl::string_view src, Writer& dest,
                                           int flush) {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of Bzip2WriterBase::WriteInternal()";
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  compressor_->next_in = const_cast<char*>(src.data());
  for (;;) {
    // If no progress was made, e.g. `compressor_->avail_out == 0` but
    // `BZ2_bzCompress()` wants to output compressed data, then
    // `BZ2_bzCompress()` returns `BZ_PARAM_ERROR`, so `dest.Push()` first.
    if (ABSL_PREDICT_FALSE(!dest.Push())) {
      return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
    }
    size_t avail_in =
        PtrDistance(compressor_->next_in, src.data() + src.size());
    int action = flush;
    if (ABSL_PREDICT_FALSE(avail_in >
                           std::numeric_limits<unsigned int>::max())) {
      avail_in = size_t{std::numeric_limits<unsigned int>::max()};
      action = BZ_RUN;
    }
    compressor_->avail_in = IntCast<unsigned int>(avail_in);
    compressor_->next_out = dest.cursor();
    compressor_->avail_out = SaturatingIntCast<unsigned int>(dest.available());
    const int bzlib_code = BZ2_bzCompress(compressor_.get(), action);
    dest.set_cursor(compressor_->next_out);
    const size_t length_written = PtrDistance(src.data(), compressor_->next_in);
    switch (bzlib_code) {
      case BZ_RUN_OK:
        if (length_written < src.size()) {
          RIEGELI_ASSERT(compressor_->avail_in == 0 ||
                         compressor_->avail_out == 0)
              << "BZ2_bzCompress() returned but there are still input data "
                 "and output space";
          continue;
        }
        break;
      case BZ_FLUSH_OK:
      case BZ_FINISH_OK:
        RIEGELI_ASSERT_EQ(compressor_->avail_out, 0u)
            << "BZ2_bzCompress() is "
            << (bzlib_code == BZ_FLUSH_OK ? "flushing" : "finishing")
            << " but there is still output space";
        continue;
      case BZ_STREAM_END:
        break;
      default:
        return FailOperation("BZ2_bzCompress()", bzlib_code);
    }
    RIEGELI_ASSERT_EQ(length_written, src.size())
        << "BZ2_bzCompress() returned but there are still input data";
    move_start_pos(length_written);
    return true;
  }
}

bool Bzip2WriterBase::FlushBehindBuffer(absl::string_view src,
                                        FlushType flush_type) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::FlushBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  return WriteInternal(src, dest, BZ_FLUSH);
}

}  // namespace riegeli


================================================
FILE: riegeli/bzip2/bzip2_writer.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_BZIP2_BZIP2_WRITER_H_
#define RIEGELI_BZIP2_BZIP2_WRITER_H_

#include <memory>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "bzlib.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

// Template parameter independent part of `Bzip2Writer`.
class Bzip2WriterBase : public BufferedWriter {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // Tunes the tradeoff between compression density and compression speed
    // (higher = better density but slower).
    //
    // `compression_level` must be between `kMinCompressionLevel` (1) and
    // `kMaxCompressionLevel` (9). Default: `kDefaultCompressionLevel` (9).
    static constexpr int kMinCompressionLevel = 1;
    static constexpr int kMaxCompressionLevel = 9;
    static constexpr int kDefaultCompressionLevel = 9;
    Options& set_compression_level(int compression_level) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GE(compression_level, kMinCompressionLevel)
          << "Failed precondition of "
             "Bzip2WriterBase::Options::set_compression_level(): "
             "compression level out of range";
      RIEGELI_ASSERT_LE(compression_level, kMaxCompressionLevel)
          << "Failed precondition of "
             "Bzip2WriterBase::Options::set_compression_level(): "
             "compression level out of range";
      compression_level_ = compression_level;
      return *this;
    }
    Options&& set_compression_level(int compression_level) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_compression_level(compression_level));
    }
    int compression_level() const { return compression_level_; }

   private:
    int compression_level_ = kDefaultCompressionLevel;
  };

  // Returns the compressed `Writer`. Unchanged by `Close()`.
  virtual Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

 protected:
  explicit Bzip2WriterBase(Closed) noexcept : BufferedWriter(kClosed) {}

  explicit Bzip2WriterBase(BufferOptions buffer_options);

  Bzip2WriterBase(Bzip2WriterBase&& that) noexcept;
  Bzip2WriterBase& operator=(Bzip2WriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options);
  void Initialize(Writer* dest, int compression_level);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverDest(absl::Status status);

  void DoneBehindBuffer(absl::string_view src) override;
  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool WriteInternal(absl::string_view src) override;
  bool FlushBehindBuffer(absl::string_view src, FlushType flush_type) override;

 private:
  struct BZStreamDeleter {
    void operator()(bz_stream* ptr) const {
      const int bzlib_code = BZ2_bzCompressEnd(ptr);
      RIEGELI_ASSERT_EQ(bzlib_code, BZ_OK)
          << "BZ2_bzCompressEnd() failed: " << bzlib_code;
      delete ptr;
    }
  };

  ABSL_ATTRIBUTE_COLD bool FailOperation(absl::string_view operation,
                                         int bzlib_code);
  bool WriteInternal(absl::string_view src, Writer& dest, int flush);

  Position initial_compressed_pos_ = 0;
  std::unique_ptr<bz_stream, BZStreamDeleter> compressor_;
};

// A `Writer` which compresses data with Bzip2 before passing it to another
// `Writer`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the compressed `Writer`. `Dest` must support
// `Dependency<Writer*, Dest>`, e.g. `Writer*` (not owned, default),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The compressed `Writer` must not be accessed until the `Bzip2Writer` is
// closed or no longer used.
//
// Because Bzip2 blocks are bit-aligned, `Flush()` is not effective (the effect
// is usually delayed by one block) and `ReadMode()` is not supported.
template <typename Dest = Writer*>
class Bzip2Writer : public Bzip2WriterBase {
 public:
  // Creates a closed `Bzip2Writer`.
  explicit Bzip2Writer(Closed) noexcept : Bzip2WriterBase(kClosed) {}

  // Will write to the compressed `Writer` provided by `dest`.
  explicit Bzip2Writer(Initializer<Dest> dest, Options options = Options());

  Bzip2Writer(Bzip2Writer&& that) = default;
  Bzip2Writer& operator=(Bzip2Writer&& that) = default;

  // Makes `*this` equivalent to a newly constructed `Bzip2Writer`. This avoids
  // constructing a temporary `Bzip2Writer` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the compressed `Writer`.
  // Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  // The object providing and possibly owning the compressed `Writer`.
  Dependency<Writer*, Dest> dest_;
};

explicit Bzip2Writer(Closed) -> Bzip2Writer<DeleteCtad<Closed>>;
template <typename Dest>
explicit Bzip2Writer(
    Dest&& dest, Bzip2WriterBase::Options options = Bzip2WriterBase::Options())
    -> Bzip2Writer<TargetT<Dest>>;

// Implementation details follow.

inline Bzip2WriterBase::Bzip2WriterBase(BufferOptions buffer_options)
    : BufferedWriter(buffer_options) {}

inline Bzip2WriterBase::Bzip2WriterBase(Bzip2WriterBase&& that) noexcept
    : BufferedWriter(static_cast<BufferedWriter&&>(that)),
      initial_compressed_pos_(that.initial_compressed_pos_),
      compressor_(std::move(that.compressor_)) {}

inline Bzip2WriterBase& Bzip2WriterBase::operator=(
    Bzip2WriterBase&& that) noexcept {
  BufferedWriter::operator=(static_cast<BufferedWriter&&>(that));
  initial_compressed_pos_ = that.initial_compressed_pos_;
  compressor_ = std::move(that.compressor_);
  return *this;
}

inline void Bzip2WriterBase::Reset(Closed) {
  BufferedWriter::Reset(kClosed);
  initial_compressed_pos_ = 0;
  compressor_.reset();
}

inline void Bzip2WriterBase::Reset(BufferOptions buffer_options) {
  BufferedWriter::Reset(buffer_options);
  initial_compressed_pos_ = 0;
  compressor_.reset();
}

template <typename Dest>
inline Bzip2Writer<Dest>::Bzip2Writer(Initializer<Dest> dest, Options options)
    : Bzip2WriterBase(options.buffer_options()), dest_(std::move(dest)) {
  Initialize(dest_.get(), options.compression_level());
}

template <typename Dest>
inline void Bzip2Writer<Dest>::Reset(Closed) {
  Bzip2WriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void Bzip2Writer<Dest>::Reset(Initializer<Dest> dest, Options options) {
  Bzip2WriterBase::Reset(options.buffer_options());
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options.compression_level());
}

template <typename Dest>
void Bzip2Writer<Dest>::Done() {
  Bzip2WriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
}

template <typename Dest>
bool Bzip2Writer<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!Bzip2WriterBase::FlushImpl(flush_type))) return false;
  if (flush_type != FlushType::kFromObject || dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Flush(flush_type))) {
      return FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
  return true;
}

}  // namespace riegeli

#endif  // RIEGELI_BZIP2_BZIP2_WRITER_H_


================================================
FILE: riegeli/chunk_encoding/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "chunk_encoder",
    srcs = ["chunk_encoder.cc"],
    hdrs = ["chunk_encoder.h"],
    deps = [
        ":constants",
        "//riegeli/base:bytes_ref",
        "//riegeli/base:chain",
        "//riegeli/base:external_ref",
        "//riegeli/base:object",
        "//riegeli/bytes:writer",
        "//riegeli/messages:serialize_message",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_protobuf//:protobuf_lite",
    ],
)

cc_library(
    name = "chunk_decoder",
    srcs = ["chunk_decoder.cc"],
    hdrs = ["chunk_decoder.h"],
    deps = [
        ":chunk",
        ":constants",
        ":field_projection",
        ":simple_decoder",
        ":transpose_decoder",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:chain",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:recycling_pool",
        "//riegeli/base:reset",
        "//riegeli/bytes:array_backward_writer",
        "//riegeli/bytes:chain_backward_writer",
        "//riegeli/bytes:chain_reader",
        "//riegeli/bytes:limiting_reader",
        "//riegeli/bytes:reader",
        "//riegeli/messages:parse_message",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@com_google_protobuf//:protobuf_lite",
    ],
)

cc_library(
    name = "constants",
    hdrs = ["constants.h"],
)

cc_library(
    name = "chunk",
    srcs = ["chunk.cc"],
    hdrs = ["chunk.h"],
    deps = [
        ":constants",
        ":hash",
        "//riegeli/base:assert",
        "//riegeli/base:chain",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:writer",
        "//riegeli/endian:endian_reading",
        "//riegeli/endian:endian_writing",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "hash",
    srcs = ["hash.cc"],
    hdrs = ["hash.h"],
    deps = [
        "//riegeli/base:chain",
        "@com_google_absl//absl/container:fixed_array",
        "@com_google_absl//absl/strings:string_view",
        "@highwayhash//:hh_types",
        "@highwayhash//:highwayhash_dynamic",
        "@highwayhash//:instruction_sets",
    ],
)

cc_library(
    name = "compressor_options",
    srcs = ["compressor_options.cc"],
    hdrs = ["compressor_options.h"],
    deps = [
        ":constants",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:options_parser",
        "//riegeli/brotli:brotli_writer",
        "//riegeli/snappy:snappy_writer",
        "//riegeli/zstd:zstd_writer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "compressor",
    srcs = ["compressor.cc"],
    hdrs = ["compressor.h"],
    deps = [
        ":brotli_encoder_selection",
        ":compressor_options",
        ":constants",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:chain",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:recycling_pool",
        "//riegeli/base:types",
        "//riegeli/bytes:chain_writer",
        "//riegeli/bytes:writer",
        "//riegeli/snappy:snappy_writer",
        "//riegeli/varint:varint_writing",
        "//riegeli/zstd:zstd_writer",
        "@com_google_absl//absl/base:core_headers",
    ],
)

cc_library(
    name = "decompressor",
    srcs = ["decompressor.cc"],
    hdrs = ["decompressor.h"],
    deps = [
        ":constants",
        "//riegeli/base:any",
        "//riegeli/base:assert",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:recycling_pool",
        "//riegeli/brotli:brotli_reader",
        "//riegeli/bytes:chain_reader",
        "//riegeli/bytes:reader",
        "//riegeli/snappy:snappy_reader",
        "//riegeli/varint:varint_reading",
        "//riegeli/zstd:zstd_reader",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
    ],
)

cc_library(
    name = "brotli_encoder_selection",
    srcs = ["brotli_encoder_selection.cc"],
    hdrs = ["brotli_encoder_selection.h"],
    deps = [
        ":compressor_options",
        "//riegeli/base:assert",
        "//riegeli/base:chain",
        "//riegeli/base:initializer",
        "//riegeli/base:recycling_pool",
        "//riegeli/brotli:brotli_writer",
        "//riegeli/bytes:chain_writer",
        "//riegeli/bytes:null_writer",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
    ],
)

cc_library(
    name = "simple_encoder",
    srcs = ["simple_encoder.cc"],
    hdrs = ["simple_encoder.h"],
    deps = [
        ":chunk_encoder",
        ":compressor",
        ":compressor_options",
        ":constants",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:bytes_ref",
        "//riegeli/base:chain",
        "//riegeli/base:external_ref",
        "//riegeli/base:recycling_pool",
        "//riegeli/base:types",
        "//riegeli/bytes:writer",
        "//riegeli/messages:serialize_message",
        "//riegeli/varint:varint_writing",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_protobuf//:protobuf_lite",
    ],
)

cc_library(
    name = "simple_decoder",
    srcs = ["simple_decoder.cc"],
    hdrs = ["simple_decoder.h"],
    deps = [
        ":constants",
        ":decompressor",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:recycling_pool",
        "//riegeli/bytes:limiting_reader",
        "//riegeli/bytes:reader",
        "//riegeli/varint:varint_reading",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
    ],
)

cc_library(
    name = "transpose_encoder",
    srcs = ["transpose_encoder.cc"],
    hdrs = ["transpose_encoder.h"],
    deps = [
        ":chunk_encoder",
        ":compressor",
        ":compressor_options",
        ":constants",
        ":transpose_internal",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:bytes_ref",
        "//riegeli/base:chain",
        "//riegeli/base:compare",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:recycling_pool",
        "//riegeli/base:types",
        "//riegeli/bytes:backward_writer",
        "//riegeli/bytes:chain_backward_writer",
        "//riegeli/bytes:chain_reader",
        "//riegeli/bytes:chain_writer",
        "//riegeli/bytes:cord_reader",
        "//riegeli/bytes:limiting_reader",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:string_reader",
        "//riegeli/bytes:writer",
        "//riegeli/messages:message_wire_format",
        "//riegeli/varint:varint_reading",
        "//riegeli/varint:varint_writing",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "transpose_decoder",
    srcs = ["transpose_decoder.cc"],
    hdrs = ["transpose_decoder.h"],
    deps = [
        ":constants",
        ":decompressor",
        ":field_projection",
        ":transpose_internal",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:chain",
        "//riegeli/base:global",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:recycling_pool",
        "//riegeli/bytes:backward_writer",
        "//riegeli/bytes:chain_reader",
        "//riegeli/bytes:limiting_backward_writer",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:string_reader",
        "//riegeli/messages:message_wire_format",
        "//riegeli/varint:varint_reading",
        "//riegeli/varint:varint_writing",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/container:fixed_array",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "transpose_internal",
    hdrs = ["transpose_internal.h"],
    visibility = ["//visibility:private"],
    deps = [
        "//riegeli/base:assert",
        "//riegeli/messages:message_wire_format",
        "//riegeli/varint:varint_writing",
    ],
)

cc_library(
    name = "field_projection",
    hdrs = ["field_projection.h"],
    deps = [
        "//riegeli/base:assert",
        "//riegeli/base:initializer",
        "@com_google_absl//absl/container:inlined_vector",
    ],
)

cc_library(
    name = "deferred_encoder",
    srcs = ["deferred_encoder.cc"],
    hdrs = ["deferred_encoder.h"],
    deps = [
        ":chunk_encoder",
        ":constants",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:bytes_ref",
        "//riegeli/base:chain",
        "//riegeli/base:external_ref",
        "//riegeli/bytes:chain_writer",
        "//riegeli/bytes:writer",
        "//riegeli/messages:serialize_message",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_protobuf//:protobuf_lite",
    ],
)


================================================
FILE: riegeli/chunk_encoding/README.md
================================================
# Purpose

Riegeli/transpose transforms protocol buffer byte streams into a custom data
format that can be compressed around 20% more densely. The additional transform
slows down both compression and decompression by around 50%. Often this is still
a desirable trade-off, but in the end it depends on the compressed data and
other system requirements such as latency vs. resource use.

# Detailed design

Transposition of a set of protocol buffers means that we associate a container
with each tag. Then all the values corresponding to a specific tag are stored in
the container associated with it. Invocation of a general purpose compression
algorithm on the concatenation of these containers offers better compression
ratios than it's invocation on the concatenation of the original binary encoding
of the protocol buffers.


================================================
FILE: riegeli/chunk_encoding/brotli_encoder_selection.cc
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/chunk_encoding/brotli_encoder_selection.h"

#include <memory>

#include "absl/base/attributes.h"
#include "absl/status/status.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/brotli/brotli_writer.h"
#include "riegeli/bytes/chain_writer.h"
#include "riegeli/bytes/null_writer.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/compressor_options.h"

namespace riegeli::chunk_encoding_internal {

ABSL_ATTRIBUTE_WEAK std::unique_ptr<Writer> NewBrotliWriter(
    Chain* compressed, const CompressorOptions& compressor_options,
    ABSL_ATTRIBUTE_UNUSED const RecyclingPoolOptions& recycling_pool_options) {
  switch (compressor_options.brotli_encoder()) {
    case BrotliEncoder::kRBrotliOrCBrotli:
    case BrotliEncoder::kCBrotli:
      return NewCBrotliWriter(compressed, compressor_options);
    case BrotliEncoder::kRBrotli: {
      std::unique_ptr<Writer> writer = std::make_unique<riegeli::NullWriter>();
      writer->Fail(absl::UnimplementedError("Rust Brotli not available"));
      return writer;
    }
  }
  RIEGELI_ASSUME_UNREACHABLE()
      << "Unknown Brotli encoder: "
      << static_cast<int>(compressor_options.brotli_encoder());
}

std::unique_ptr<Writer> NewCBrotliWriter(
    Chain* compressed, const CompressorOptions& compressor_options) {
  return std::make_unique<BrotliWriter<ChainWriter<>>>(
      riegeli::Maker(compressed),
      BrotliWriterBase::Options()
          .set_compression_level(compressor_options.compression_level())
          .set_window_log(compressor_options.brotli_window_log()));
}

}  // namespace riegeli::chunk_encoding_internal


================================================
FILE: riegeli/chunk_encoding/brotli_encoder_selection.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CHUNK_ENCODING_BROTLI_ENCODER_SELECTION_H_
#define RIEGELI_CHUNK_ENCODING_BROTLI_ENCODER_SELECTION_H_

#include <memory>

#include "riegeli/base/chain.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/compressor_options.h"

namespace riegeli::chunk_encoding_internal {

// Creates a `Writer` which compresses data with Brotli and writes them to
// `compressed`.
//
// The encoder implementation is determined by
// `compressor_options.brotli_encoder()`.
//
// This is a weak function. Its default definition supports only C Brotli.
// It can be overridden to support also Rust Brotli.
std::unique_ptr<Writer> NewBrotliWriter(
    Chain* compressed, const CompressorOptions& compressor_options,
    const RecyclingPoolOptions& recycling_pool_options);

// Support for `NewBrotliWriter()`: uses C Brotli, ignores
// `compressor_options.brotli_encoder()`.
std::unique_ptr<Writer> NewCBrotliWriter(
    Chain* compressed, const CompressorOptions& compressor_options);

}  // namespace riegeli::chunk_encoding_internal

#endif  // RIEGELI_CHUNK_ENCODING_BROTLI_ENCODER_SELECTION_H_


================================================
FILE: riegeli/chunk_encoding/chunk.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/chunk_encoding/chunk.h"

#include <stdint.h>

#include "absl/base/optimization.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/chunk_encoding/hash.h"

namespace riegeli {

ChunkHeader::ChunkHeader(const Chain& data, ChunkType chunk_type,
                         uint64_t num_records, uint64_t decoded_data_size) {
  RIEGELI_ASSERT_LE(num_records, kMaxNumRecords)
      << "Failed precondition of ChunkHeader::ChunkHeader(): "
         "number of records out of range";
  set_data_size(data.size());
  set_data_hash(chunk_encoding_internal::Hash(data));
  set_chunk_type_and_num_records(chunk_type, num_records);
  set_decoded_data_size(decoded_data_size);
  set_header_hash(computed_header_hash());
}

uint64_t ChunkHeader::computed_header_hash() const {
  return chunk_encoding_internal::Hash(
      absl::string_view(bytes() + sizeof(uint64_t), size() - sizeof(uint64_t)));
}

bool Chunk::WriteTo(Writer& dest) const {
  if (ABSL_PREDICT_FALSE(
          !dest.Write(absl::string_view(header.bytes(), header.size())))) {
    return false;
  }
  return dest.Write(data);
}

bool Chunk::ReadFrom(Reader& src) {
  if (ABSL_PREDICT_FALSE(!src.Read(header.size(), header.bytes()))) {
    data.Clear();
    return false;
  }
  return src.Read(header.data_size(), data);
}

}  // namespace riegeli


================================================
FILE: riegeli/chunk_encoding/chunk.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CHUNK_ENCODING_CHUNK_H_
#define RIEGELI_CHUNK_ENCODING_CHUNK_H_

#include <stddef.h>
#include <stdint.h>

#include "absl/base/attributes.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/endian/endian_reading.h"
#include "riegeli/endian/endian_writing.h"

namespace riegeli {

class ChunkHeader {
 public:
  ChunkHeader() = default;

  explicit ChunkHeader(const Chain& data, ChunkType chunk_type,
                       uint64_t num_records, uint64_t decoded_data_size);

  ChunkHeader(const ChunkHeader& that) = default;
  ChunkHeader& operator=(const ChunkHeader& that) = default;

  char* bytes() { return bytes_; }
  const char* bytes() const { return bytes_; }
  static constexpr size_t size() { return sizeof(bytes_); }

  uint64_t computed_header_hash() const;
  uint64_t stored_header_hash() const {
    return ReadLittleEndian<uint64_t>(bytes_);
  }
  uint64_t data_size() const {
    return ReadLittleEndian<uint64_t>(bytes_ + sizeof(uint64_t));
  }
  uint64_t data_hash() const {
    return ReadLittleEndian<uint64_t>(bytes_ + 2 * sizeof(uint64_t));
  }
  ChunkType chunk_type() const {
    return static_cast<ChunkType>(
        ReadLittleEndian<uint64_t>(bytes_ + 3 * sizeof(uint64_t)) & 0xff);
  }
  uint64_t num_records() const {
    return ReadLittleEndian<uint64_t>(bytes_ + 3 * sizeof(uint64_t)) >> 8;
  }
  uint64_t decoded_data_size() const {
    return ReadLittleEndian<uint64_t>(bytes_ + 4 * sizeof(uint64_t));
  }

 private:
  void set_header_hash(uint64_t value) {
    WriteLittleEndian<uint64_t>(value, bytes_);
  }
  void set_data_size(uint64_t value) {
    WriteLittleEndian<uint64_t>(value, bytes_ + sizeof(uint64_t));
  }
  void set_data_hash(uint64_t value) {
    WriteLittleEndian<uint64_t>(value, bytes_ + 2 * sizeof(uint64_t));
  }
  void set_chunk_type_and_num_records(ChunkType chunk_type,
                                      uint64_t num_records) {
    RIEGELI_ASSERT_LE(num_records, kMaxNumRecords)
        << "Failed precondition of "
           "ChunkHeader::set_chunk_type_and_num_records(): "
           "number of records out of range";
    WriteLittleEndian<uint64_t>(
        static_cast<uint64_t>(chunk_type) | (num_records << 8),
        bytes_ + 3 * sizeof(uint64_t));
  }
  void set_decoded_data_size(uint64_t value) {
    WriteLittleEndian<uint64_t>(value, bytes_ + 4 * sizeof(uint64_t));
  }

  // Representation (Little Endian):
  //  - `uint64_t`: `header_hash`
  //  - `uint64_t`: `data_size`
  //  - `uint64_t`: `data_hash`
  //  - `uint64_t`: `chunk_type` (low 8 bits) | `num_records` (high 56 bits)
  //  - `uint64_t`: `decoded_data_size`
  char bytes_[5 * sizeof(uint64_t)];
};

struct Chunk {
  ABSL_ATTRIBUTE_REINITIALIZES void Reset() { data = Chain(); }
  ABSL_ATTRIBUTE_REINITIALIZES void Clear() { data.Clear(); }

  bool WriteTo(Writer& dest) const;
  bool ReadFrom(Reader& src);

  ChunkHeader header;
  Chain data;
};

}  // namespace riegeli

#endif  // RIEGELI_CHUNK_ENCODING_CHUNK_H_


================================================
FILE: riegeli/chunk_encoding/chunk_decoder.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/chunk_encoding/chunk_decoder.h"

#include <stddef.h>
#include <stdint.h>

#include <utility>
#include <vector>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/types/span.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/bytes/array_backward_writer.h"
#include "riegeli/bytes/chain_backward_writer.h"
#include "riegeli/bytes/chain_reader.h"
#include "riegeli/bytes/limiting_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/chunk_encoding/chunk.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/chunk_encoding/field_projection.h"
#include "riegeli/chunk_encoding/simple_decoder.h"
#include "riegeli/chunk_encoding/transpose_decoder.h"
#include "riegeli/messages/parse_message.h"

namespace riegeli {

void ChunkDecoder::Done() { recoverable_ = false; }

bool ChunkDecoder::Decode(const Chunk& chunk, bool flatten) {
  Clear();
  ChainReader<> data_reader(&chunk.data);
  if (ABSL_PREDICT_FALSE(chunk.header.num_records() > limits_.max_size())) {
    return Fail(absl::ResourceExhaustedError("Too many records"));
  }
  Chain values;
  if (ABSL_PREDICT_FALSE(!Parse(chunk.header, data_reader, values, flatten))) {
    limits_.clear();  // Ensure that `index() == num_records()`.
    return false;
  }
  RIEGELI_ASSERT_EQ(limits_.size(), chunk.header.num_records())
      << "Wrong number of record end positions";
  RIEGELI_ASSERT_EQ(limits_.empty() ? size_t{0} : limits_.back(), values.size())
      << "Wrong last record end position";
  if (chunk.header.num_records() == 0) {
    RIEGELI_ASSERT_EQ(values.size(), 0u) << "Wrong decoded data size";
  } else if (field_projection_.includes_all()) {
    RIEGELI_ASSERT_EQ(values.size(), chunk.header.decoded_data_size())
        << "Wrong decoded data size";
  } else {
    RIEGELI_ASSERT_LE(values.size(), chunk.header.decoded_data_size())
        << "Wrong decoded data size";
  }
  values_reader_.Reset(std::move(values));
  return true;
}

inline bool ChunkDecoder::Parse(const ChunkHeader& header, Reader& src,
                                Chain& dest, bool flatten) {
  switch (header.chunk_type()) {
    case ChunkType::kFileSignature:
      if (ABSL_PREDICT_FALSE(header.data_size() != 0)) {
        return Fail(absl::InvalidArgumentError(absl::StrCat(
            "Invalid file signature chunk: data size is not zero: ",
            header.data_size())));
      }
      if (ABSL_PREDICT_FALSE(header.num_records() != 0)) {
        return Fail(absl::InvalidArgumentError(absl::StrCat(
            "Invalid file signature chunk: number of records is not zero: ",
            header.num_records())));
      }
      if (ABSL_PREDICT_FALSE(header.decoded_data_size() != 0)) {
        return Fail(absl::InvalidArgumentError(absl::StrCat(
            "Invalid file signature chunk: decoded data size is not zero: ",
            header.decoded_data_size())));
      }
      return true;
    case ChunkType::kFileMetadata:
      if (ABSL_PREDICT_FALSE(header.num_records() != 0)) {
        return Fail(absl::InvalidArgumentError(absl::StrCat(
            "Invalid file metadata chunk: number of records is not zero: ",
            header.num_records())));
      }
      return true;
    case ChunkType::kPadding:
      if (ABSL_PREDICT_FALSE(header.num_records() != 0)) {
        return Fail(absl::InvalidArgumentError(absl::StrCat(
            "Invalid padding chunk: number of records is not zero: ",
            header.num_records())));
      }
      if (ABSL_PREDICT_FALSE(header.decoded_data_size() != 0)) {
        return Fail(absl::InvalidArgumentError(absl::StrCat(
            "Invalid padding chunk: decoded data size is not zero: ",
            header.decoded_data_size())));
      }
      return true;
    case ChunkType::kSimple: {
      SimpleDecoder simple_decoder(
          SimpleDecoder::Options().set_recycling_pool_options(
              recycling_pool_options_));
      if (ABSL_PREDICT_FALSE(!simple_decoder.Decode(&src, header.num_records(),
                                                    header.decoded_data_size(),
                                                    limits_))) {
        return Fail(simple_decoder.status());
      }
      bool read_ok;
      if (flatten &&
          // If all data are already available in a flat buffer, the chunk is
          // likely uncompressed in a `ChainReader`, and reading it to a `Chain`
          // will avoid copying it while preserving flatness.
          simple_decoder.reader().available() < header.decoded_data_size()) {
        const absl::Span<char> buffer =
            dest.AppendFixedBuffer(IntCast<size_t>(header.decoded_data_size()));
        read_ok = simple_decoder.reader().Read(buffer.size(), buffer.data());
      } else {
        read_ok = simple_decoder.reader().Read(
            IntCast<size_t>(header.decoded_data_size()), dest);
      }
      if (ABSL_PREDICT_FALSE(!read_ok)) {
        return Fail(simple_decoder.reader().StatusOrAnnotate(
            absl::InvalidArgumentError("Reading record values failed")));
      }
      if (ABSL_PREDICT_FALSE(!simple_decoder.VerifyEndAndClose())) {
        return Fail(simple_decoder.status());
      }
      if (ABSL_PREDICT_FALSE(!src.VerifyEndAndClose())) {
        return Fail(src.status());
      }
      return true;
    }
    case ChunkType::kTransposed: {
      TransposeDecoder transpose_decoder(
          TransposeDecoder::Options().set_recycling_pool_options(
              recycling_pool_options_));
      bool decode_ok;
      if (flatten && field_projection_.includes_all()) {
        ArrayBackwardWriter<> dest_writer(dest.AppendFixedBuffer(
            IntCast<size_t>(header.decoded_data_size())));
        decode_ok = transpose_decoder.Decode(
            header.num_records(), header.decoded_data_size(), field_projection_,
            src, dest_writer, limits_);
        if (ABSL_PREDICT_FALSE(!dest_writer.Close())) {
          return Fail(dest_writer.status());
        }
      } else {
        ChainBackwardWriter<> dest_writer(&dest);
        if (field_projection_.includes_all()) {
          dest_writer.SetWriteSizeHint(header.decoded_data_size());
        }
        decode_ok = transpose_decoder.Decode(
            header.num_records(), header.decoded_data_size(), field_projection_,
            src, dest_writer, limits_);
        if (ABSL_PREDICT_FALSE(!dest_writer.Close())) {
          return Fail(dest_writer.status());
        }
        if (flatten) dest.Flatten();
      }
      if (ABSL_PREDICT_FALSE(!decode_ok)) {
        return Fail(transpose_decoder.status());
      }
      if (ABSL_PREDICT_FALSE(!src.VerifyEndAndClose())) {
        return Fail(src.status());
      }
      return true;
    }
  }
  if (header.num_records() == 0) {
    // Ignore chunks with no records, even if the type is unknown.
    return true;
  }
  return Fail(absl::UnimplementedError(absl::StrCat(
      "Unknown chunk type: ", static_cast<uint64_t>(header.chunk_type()))));
}

bool ChunkDecoder::ReadRecord(google::protobuf::MessageLite& record) {
  if (ABSL_PREDICT_FALSE(!ok() || index() == num_records())) return false;
  const size_t start = IntCast<size_t>(values_reader_.pos());
  const size_t limit = limits_[IntCast<size_t>(index_)];
  RIEGELI_ASSERT_LE(start, limit)
      << "Failed invariant of ChunkDecoder: record end positions not sorted";
  if (absl::Status status =
          ParseMessage(ReaderSpan(&values_reader_, limit - start), record);
      ABSL_PREDICT_FALSE(!status.ok())) {
    RIEGELI_EVAL_ASSERT(values_reader_.Seek(limit)) << values_reader_.status();
    recoverable_ = true;
    return Fail(std::move(status));
  }
  ++index_;
  return true;
}

bool ChunkDecoder::Recover() {
  if (!recoverable_) return false;
  RIEGELI_ASSERT(!ok()) << "Failed invariant of ChunkDecoder: "
                           "recovery applicable but ChunkDecoder OK";
  recoverable_ = false;
  MarkNotFailed();
  ++index_;
  return true;
}

}  // namespace riegeli


================================================
FILE: riegeli/chunk_encoding/chunk_decoder.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CHUNK_ENCODING_CHUNK_DECODER_H_
#define RIEGELI_CHUNK_ENCODING_CHUNK_DECODER_H_

#include <stddef.h>
#include <stdint.h>

#include <initializer_list>
#include <string>
#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/reset.h"
#include "riegeli/bytes/chain_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/chunk_encoding/chunk.h"
#include "riegeli/chunk_encoding/field_projection.h"

namespace riegeli {

class ChunkDecoder : public Object {
 public:
  class Options {
   public:
    Options() noexcept {}

    // Specifies the set of fields to be included in returned records, allowing
    // to exclude the remaining fields (but does not guarantee exclusion).
    // Excluding data makes reading faster.
    //
    // Default: `FieldProjection::All()`.
    Options& set_field_projection(
        Initializer<FieldProjection> field_projection) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      riegeli::Reset(field_projection_, std::move(field_projection));
      return *this;
    }
    Options&& set_field_projection(
        Initializer<FieldProjection> field_projection) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_field_projection(std::move(field_projection)));
    }
    Options& set_field_projection(
        std::initializer_list<Field> field_projection) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      set_field_projection(Initializer<FieldProjection>(field_projection));
      return *this;
    }
    Options&& set_field_projection(
        std::initializer_list<Field> field_projection) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_field_projection(std::move(field_projection)));
    }
    FieldProjection& field_projection() ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return field_projection_;
    }
    const FieldProjection& field_projection() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return field_projection_;
    }

    // Options for a global `RecyclingPool` of decompression contexts.
    //
    // They tune the amount of memory which is kept to speed up creation of new
    // decompression sessions, and usage of a background thread to clean it.
    //
    // Default: `RecyclingPoolOptions()`.
    Options& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      recycling_pool_options_ = recycling_pool_options;
      return *this;
    }
    Options&& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_recycling_pool_options(recycling_pool_options));
    }
    const RecyclingPoolOptions& recycling_pool_options() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recycling_pool_options_;
    }

   private:
    FieldProjection field_projection_ = FieldProjection::All();
    RecyclingPoolOptions recycling_pool_options_;
  };

  // Creates an empty `ChunkDecoder`.
  explicit ChunkDecoder(Options options = Options());

  ChunkDecoder(ChunkDecoder&& that) noexcept;
  ChunkDecoder& operator=(ChunkDecoder&& that) noexcept;

  // Makes `*this` equivalent to a newly constructed `ChunkDecoder`. This avoids
  // constructing a temporary `ChunkDecoder` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Options options = Options());

  // Resets the `ChunkDecoder` to an empty chunk. Keeps options unchanged.
  void Clear();

  // Resets the `ChunkDecoder` to an empty chunk. Keeps options unchanged,
  // except that field projection is set to `field_projection`.
  void ClearAndSetFieldProjection(
      Initializer<FieldProjection> field_projection) {
    Clear();
    riegeli::Reset(field_projection_, std::move(field_projection));
  }
  void ClearAndSetFieldProjection(
      std::initializer_list<Field> field_projection) {
    ClearAndSetFieldProjection(Initializer<FieldProjection>(field_projection));
  }

  // Resets the `ChunkDecoder` and parses the chunk. Keeps options unchanged.
  //
  // If `flatten`, prefer making records readily available as
  // `absl::string_view`.
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  bool Decode(const Chunk& chunk, bool flatten = false);

  // Reads the next record.
  //
  // `ReadRecord(google::protobuf::MessageLite&)` parses raw bytes to a proto
  // message after reading. The remaining overloads read raw bytes (they never
  // generate a new failure). For `ReadRecord(absl::string_view&)` the
  // `absl::string_view` is valid until the next non-const operation on this
  // `ChunkDecoder`.
  //
  // Return values:
  //  * `true`                 - success (`record` is set, `ok()`)
  //  * `false` (when `ok()`)  - chunk ends
  //  * `false` (when `!ok()`) - failure
  bool ReadRecord(google::protobuf::MessageLite& record);
  bool ReadRecord(absl::string_view& record);
  bool ReadRecord(std::string& record);
  bool ReadRecord(Chain& record);
  bool ReadRecord(absl::Cord& record);

  // If `!ok()` and the failure was caused by an unparsable message, then
  // `Recover()` allows reading again by skipping the unparsable message.
  //
  // If `ok()`, or if `!ok()` but the failure was not caused by an unparsable
  // message, then `Recover()` does nothing and returns `false`.
  //
  // Return values:
  //  * `true`  - success
  //  * `false` - failure not caused by an unparsable message
  bool Recover();

  // Returns the current record index. Unchanged by `Close()`.
  uint64_t index() const { return index_; }

  // Sets the current record index.
  //
  // If `index > num_records()`, the current index is set to `num_records()`.
  //
  // Precondition: `ok()`
  void SetIndex(uint64_t index);

  // Returns the number of records. Unchanged by `Close()`.
  uint64_t num_records() const { return IntCast<uint64_t>(limits_.size()); }

 protected:
  void Done() override;

 private:
  bool Parse(const ChunkHeader& header, Reader& src, Chain& dest, bool flatten);

  FieldProjection field_projection_;
  RecyclingPoolOptions recycling_pool_options_;
  // Invariants if `ok()`:
  //   `limits_` are sorted
  //   `(limits_.empty() ? 0 : limits_.back())` == size of `values_reader_`
  //   `(index_ == 0 ? 0 : limits_[index_ - 1]) == values_reader_.pos()`
  std::vector<size_t> limits_;
  ChainReader<Chain> values_reader_;
  // Invariant: if `ok()` then `index_ <= num_records()`
  uint64_t index_ = 0;
  // Whether `Recover()` is applicable.
  //
  // Invariant: if `recoverable_` then `!ok()`
  bool recoverable_ = false;
};

// Implementation details follow.

inline ChunkDecoder::ChunkDecoder(Options options)
    : field_projection_(std::move(options.field_projection())),
      recycling_pool_options_(options.recycling_pool_options()),
      values_reader_(riegeli::Maker()) {}

inline ChunkDecoder::ChunkDecoder(ChunkDecoder&& that) noexcept
    : Object(static_cast<Object&&>(that)),
      field_projection_(std::move(that.field_projection_)),
      recycling_pool_options_(that.recycling_pool_options_),
      limits_(std::move(that.limits_)),
      values_reader_(std::move(that.values_reader_)),
      index_(that.index_),
      recoverable_(std::exchange(that.recoverable_, false)) {}

inline ChunkDecoder& ChunkDecoder::operator=(ChunkDecoder&& that) noexcept {
  Object::operator=(static_cast<Object&&>(that));
  field_projection_ = std::move(that.field_projection_);
  recycling_pool_options_ = that.recycling_pool_options_;
  limits_ = std::move(that.limits_);
  values_reader_ = std::move(that.values_reader_);
  index_ = that.index_;
  recoverable_ = std::exchange(that.recoverable_, false);
  return *this;
}

inline void ChunkDecoder::Reset(Options options) {
  field_projection_ = std::move(options.field_projection());
  recycling_pool_options_ = options.recycling_pool_options();
  Clear();
}

inline void ChunkDecoder::Clear() {
  Object::Reset();
  limits_.clear();
  values_reader_.Reset(riegeli::Maker());
  index_ = 0;
  recoverable_ = false;
}

inline bool ChunkDecoder::ReadRecord(absl::string_view& record) {
  if (ABSL_PREDICT_FALSE(!ok() || index() == num_records())) {
    record = absl::string_view();
    return false;
  }
  const size_t start = IntCast<size_t>(values_reader_.pos());
  const size_t limit = limits_[IntCast<size_t>(index_)];
  RIEGELI_ASSERT_LE(start, limit)
      << "Failed invariant of ChunkDecoder: record end positions not sorted";
  RIEGELI_EVAL_ASSERT(values_reader_.Read(limit - start, record))
      << values_reader_.status();
  ++index_;
  return true;
}

inline bool ChunkDecoder::ReadRecord(std::string& record) {
  if (ABSL_PREDICT_FALSE(!ok() || index() == num_records())) {
    record.clear();
    return false;
  }
  const size_t start = IntCast<size_t>(values_reader_.pos());
  const size_t limit = limits_[IntCast<size_t>(index_)];
  RIEGELI_ASSERT_LE(start, limit)
      << "Failed invariant of ChunkDecoder: record end positions not sorted";
  RIEGELI_EVAL_ASSERT(values_reader_.Read(limit - start, record))
      << values_reader_.status();
  ++index_;
  return true;
}

inline bool ChunkDecoder::ReadRecord(Chain& record) {
  if (ABSL_PREDICT_FALSE(!ok() || index() == num_records())) {
    record.Clear();
    return false;
  }
  const size_t start = IntCast<size_t>(values_reader_.pos());
  const size_t limit = limits_[IntCast<size_t>(index_)];
  RIEGELI_ASSERT_LE(start, limit)
      << "Failed invariant of ChunkDecoder: record end positions not sorted";
  RIEGELI_EVAL_ASSERT(values_reader_.Read(limit - start, record))
      << values_reader_.status();
  ++index_;
  return true;
}

inline bool ChunkDecoder::ReadRecord(absl::Cord& record) {
  if (ABSL_PREDICT_FALSE(!ok() || index() == num_records())) {
    record.Clear();
    return false;
  }
  const size_t start = IntCast<size_t>(values_reader_.pos());
  const size_t limit = limits_[IntCast<size_t>(index_)];
  RIEGELI_ASSERT_LE(start, limit)
      << "Failed invariant of ChunkDecoder: record end positions not sorted";
  RIEGELI_EVAL_ASSERT(values_reader_.Read(limit - start, record))
      << values_reader_.status();
  ++index_;
  return true;
}

inline void ChunkDecoder::SetIndex(uint64_t index) {
  RIEGELI_ASSERT_OK(*this) << "Failed precondition of ChunkDecoder::SetIndex()";
  index_ = UnsignedMin(index, num_records());
  const size_t start =
      index_ == 0 ? size_t{0} : limits_[IntCast<size_t>(index_ - 1)];
  RIEGELI_EVAL_ASSERT(values_reader_.Seek(start)) << values_reader_.status();
}

}  // namespace riegeli

#endif  // RIEGELI_CHUNK_ENCODING_CHUNK_DECODER_H_


================================================
FILE: riegeli/chunk_encoding/chunk_encoder.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/chunk_encoding/chunk_encoder.h"

#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/chain.h"
#include "riegeli/messages/serialize_message.h"

namespace riegeli {

void ChunkEncoder::Done() {
  num_records_ = 0;
  decoded_data_size_ = 0;
}

bool ChunkEncoder::AddRecord(const google::protobuf::MessageLite& record,
                             SerializeMessageOptions serialize_options) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chain serialized;
  if (absl::Status status =
          SerializeMessage(record, serialized, serialize_options);
      ABSL_PREDICT_FALSE(!status.ok())) {
    return Fail(std::move(status));
  }
  return AddRecord(std::move(serialized));
}

bool ChunkEncoder::AddRecord(Chain&& record) {
  // Not `std::move(record)`: forward to `AddRecord(const Chain&)`.
  return AddRecord(record);
}

bool ChunkEncoder::AddRecord(absl::Cord&& record) {
  // Not `std::move(record)`: forward to `AddRecord(const absl::Cord&)`.
  return AddRecord(record);
}

}  // namespace riegeli


================================================
FILE: riegeli/chunk_encoding/chunk_encoder.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CHUNK_ENCODING_CHUNK_ENCODER_H_
#define RIEGELI_CHUNK_ENCODING_CHUNK_ENCODER_H_

#include <stddef.h>
#include <stdint.h>

#include <type_traits>
#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/object.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/messages/serialize_message.h"

namespace riegeli {

class ChunkEncoder : public Object {
 public:
  // Creates an empty `ChunkEncoder`.
  ChunkEncoder() noexcept {}

  ChunkEncoder(const ChunkEncoder&) = delete;
  ChunkEncoder& operator=(const ChunkEncoder&) = delete;

  // Resets the `ChunkEncoder` back to empty.
  virtual void Clear();

  // Adds the next record.
  //
  // `AddRecord(google::protobuf::MessageLite)` serializes a proto message to
  // raw bytes beforehand. The remaining overloads accept raw bytes.
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  bool AddRecord(const google::protobuf::MessageLite& record);
  virtual bool AddRecord(const google::protobuf::MessageLite& record,
                         SerializeMessageOptions serialize_options);
  virtual bool AddRecord(BytesRef record) = 0;
  ABSL_ATTRIBUTE_ALWAYS_INLINE bool AddRecord(const char* record) {
    return AddRecord(absl::string_view(record));
  }
  virtual bool AddRecord(ExternalRef record) = 0;
  template <typename Src,
            std::enable_if_t<SupportsExternalRefWhole<Src>::value, int> = 0>
  bool AddRecord(Src&& src);
  virtual bool AddRecord(const Chain& record) = 0;
  virtual bool AddRecord(Chain&& record);
  virtual bool AddRecord(const absl::Cord& record) = 0;
  virtual bool AddRecord(absl::Cord&& record);

  // Add multiple records, expressed as concatenated record values and sorted
  // record end positions.
  //
  // Preconditions:
  //   `limits` are sorted
  //   `(limits.empty() ? 0 : limits.back()) == records.size()`
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  virtual bool AddRecords(Chain records, std::vector<size_t> limits) = 0;

  // Returns the number of records added so far.
  uint64_t num_records() const { return num_records_; }

  // Returns the sum of record sizes added so far.
  uint64_t decoded_data_size() const { return decoded_data_size_; }

  // Encodes the chunk to `dest`, setting `chunk_type`, `num_records`, and
  // `decoded_data_size`. Closes the `ChunkEncoder` on success.
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`);
  //              if `!dest.ok()` then the problem was at `dest`
  virtual bool EncodeAndClose(Writer& dest, ChunkType& chunk_type,
                              uint64_t& num_records,
                              uint64_t& decoded_data_size) = 0;

 protected:
  void Done() override;

  uint64_t num_records_ = 0;
  uint64_t decoded_data_size_ = 0;
};

// Implementation details follow.

inline void ChunkEncoder::Clear() {
  Object::Reset();
  num_records_ = 0;
  decoded_data_size_ = 0;
}

inline bool ChunkEncoder::AddRecord(
    const google::protobuf::MessageLite& record) {
  return AddRecord(record, SerializeMessageOptions());
}

template <typename Src,
          std::enable_if_t<SupportsExternalRefWhole<Src>::value, int>>
inline bool ChunkEncoder::AddRecord(Src&& src) {
  return AddRecord(ExternalRef(std::forward<Src>(src)));
}

}  // namespace riegeli

#endif  // RIEGELI_CHUNK_ENCODING_CHUNK_ENCODER_H_


================================================
FILE: riegeli/chunk_encoding/compressor.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/chunk_encoding/compressor.h"

#include <stdint.h>

#include <memory>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/chain_writer.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/brotli_encoder_selection.h"
#include "riegeli/chunk_encoding/compressor_options.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/snappy/snappy_writer.h"
#include "riegeli/varint/varint_writing.h"
#include "riegeli/zstd/zstd_writer.h"

namespace riegeli::chunk_encoding_internal {

Compressor::Compressor(CompressorOptions compressor_options,
                       TuningOptions tuning_options)
    : compressor_options_(std::move(compressor_options)),
      tuning_options_(std::move(tuning_options)) {
  Initialize();
  SetWriteSizeHint();
}

void Compressor::Clear(TuningOptions tuning_options) {
  tuning_options_ = std::move(tuning_options);
  Clear();
}

void Compressor::Clear() {
  Object::Reset();
  Initialize();
  SetWriteSizeHint();
}

inline void Compressor::Initialize() {
  switch (compressor_options_.compression_type()) {
    case CompressionType::kNone:
      writer_ = std::make_unique<ChainWriter<>>(&compressed_);
      return;
    case CompressionType::kBrotli:
      writer_ = NewBrotliWriter(&compressed_, compressor_options_,
                                tuning_options_.recycling_pool_options());
      return;
    case CompressionType::kZstd:
      writer_ = std::make_unique<ZstdWriter<ChainWriter<>>>(
          riegeli::Maker(&compressed_),
          ZstdWriterBase::Options()
              .set_compression_level(compressor_options_.compression_level())
              .set_window_log(compressor_options_.zstd_window_log())
              .set_pledged_size(tuning_options_.pledged_size())
              .set_recycling_pool_options(
                  tuning_options_.recycling_pool_options()));
      return;
    case CompressionType::kSnappy:
      writer_ = std::make_unique<SnappyWriter<ChainWriter<>>>(
          riegeli::Maker(&compressed_),
          SnappyWriterBase::Options().set_compression_level(
              compressor_options_.compression_level()));
      return;
  }
  RIEGELI_ASSUME_UNREACHABLE()
      << "Unknown compression type: "
      << static_cast<unsigned>(compressor_options_.compression_type());
}

inline void Compressor::SetWriteSizeHint() {
  writer_->SetWriteSizeHint(tuning_options_.pledged_size() != std::nullopt
                                ? tuning_options_.pledged_size()
                                : tuning_options_.size_hint());
}

bool Compressor::EncodeAndClose(Writer& dest) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const Position uncompressed_size = writer().pos();
  if (ABSL_PREDICT_FALSE(!writer().Close())) return Fail(writer().status());
  if (compressor_options_.compression_type() != CompressionType::kNone) {
    if (ABSL_PREDICT_FALSE(
            !WriteVarint64(IntCast<uint64_t>(uncompressed_size), dest))) {
      return Fail(dest.status());
    }
  }
  if (ABSL_PREDICT_FALSE(!dest.Write(std::move(compressed_)))) {
    return Fail(dest.status());
  }
  return Close();
}

bool Compressor::LengthPrefixedEncodeAndClose(Writer& dest) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const Position uncompressed_size = writer().pos();
  if (ABSL_PREDICT_FALSE(!writer().Close())) return Fail(writer().status());
  uint64_t compressed_size = compressed_.size();
  if (compressor_options_.compression_type() != CompressionType::kNone) {
    compressed_size += LengthVarint64(IntCast<uint64_t>(uncompressed_size));
  }
  if (ABSL_PREDICT_FALSE(!WriteVarint64(compressed_size, dest))) {
    return Fail(dest.status());
  }
  if (compressor_options_.compression_type() != CompressionType::kNone) {
    if (ABSL_PREDICT_FALSE(
            !WriteVarint64(IntCast<uint64_t>(uncompressed_size), dest))) {
      return Fail(dest.status());
    }
  }
  if (ABSL_PREDICT_FALSE(!dest.Write(std::move(compressed_)))) {
    return Fail(dest.status());
  }
  return Close();
}

}  // namespace riegeli::chunk_encoding_internal


================================================
FILE: riegeli/chunk_encoding/compressor.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CHUNK_ENCODING_COMPRESSOR_H_
#define RIEGELI_CHUNK_ENCODING_COMPRESSOR_H_

#include <memory>
#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/object.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/compressor_options.h"

namespace riegeli::chunk_encoding_internal {

class Compressor : public Object {
 public:
  class TuningOptions {
   public:
    TuningOptions() noexcept {}

    // Exact uncompressed size, or `std::nullopt` if unknown. This may improve
    // compression density and performance, and may cause the size to be stored
    // in the compressed stream header.
    //
    // If the pledged size turns out to not match reality, compression may fail.
    //
    // Default: `std::nullopt`.
    TuningOptions& set_pledged_size(std::optional<Position> pledged_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      pledged_size_ = pledged_size;
      return *this;
    }
    TuningOptions&& set_pledged_size(std::optional<Position> pledged_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_pledged_size(pledged_size));
    }
    std::optional<Position> pledged_size() const { return pledged_size_; }

    // Expected uncompressed size, or `std::nullopt` if unknown. This may
    // improve compression density and performance.
    //
    // If the size hint turns out to not match reality, nothing breaks.
    //
    // `pledged_size()`, if not `std::nullopt`, overrides `size_hint()`.
    //
    // Default: `std::nullopt`.
    TuningOptions& set_size_hint(std::optional<Position> size_hint) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      size_hint_ = size_hint;
      return *this;
    }
    TuningOptions&& set_size_hint(std::optional<Position> size_hint) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_size_hint(size_hint));
    }
    std::optional<Position> size_hint() const { return size_hint_; }

    // Options for a global `RecyclingPool` of compression contexts.
    //
    // They tune the amount of memory which is kept to speed up creation of new
    // compression sessions, and usage of a background thread to clean it.
    //
    // Default: `RecyclingPoolOptions()`.
    TuningOptions& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      recycling_pool_options_ = recycling_pool_options;
      return *this;
    }
    TuningOptions&& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_recycling_pool_options(recycling_pool_options));
    }
    const RecyclingPoolOptions& recycling_pool_options() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recycling_pool_options_;
    }

   private:
    std::optional<Position> pledged_size_;
    std::optional<Position> size_hint_;
    RecyclingPoolOptions recycling_pool_options_;
  };

  // Creates a closed `Compressor`.
  explicit Compressor(Closed) noexcept : Object(kClosed) {}

  // Creates an empty `Compressor`.
  explicit Compressor(CompressorOptions compressor_options,
                      TuningOptions tuning_options = TuningOptions());

  Compressor(const Compressor&) = delete;
  Compressor& operator=(const Compressor&) = delete;

  // Resets the `Compressor` back to empty. Keeps compressor options unchanged.
  // Changes tuning options.
  void Clear(TuningOptions tuning_options);

  // Resets the `Compressor` back to empty. Keeps compressor options and tuning
  // options unchanged.
  void Clear();

  // Returns the `Writer` to which uncompressed data should be written.
  //
  // Precondition: `ok()`
  Writer& writer() ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Writes compressed data to `dest`. Closes the `Compressor` on success.
  //
  // If `compressor_options.compression_type()` is not `kNone`, writes
  // uncompressed size as a varint before the data.
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  bool EncodeAndClose(Writer& dest);

  // Like `EncodeAndClose()`, but writes the compressed size as a varint before
  // anything else. The compressed size includes the length of the uncompressed
  // size.
  bool LengthPrefixedEncodeAndClose(Writer& dest);

 private:
  void Initialize();
  void SetWriteSizeHint();

  CompressorOptions compressor_options_;
  TuningOptions tuning_options_;
  Chain compressed_;
  std::unique_ptr<Writer> writer_;
};

// Implementation details follow.

inline Writer& Compressor::writer() ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_ASSERT_OK(*this) << "Failed precondition of Compressor::writer()";
  return *writer_;
}

}  // namespace riegeli::chunk_encoding_internal

#endif  // RIEGELI_CHUNK_ENCODING_COMPRESSOR_H_


================================================
FILE: riegeli/chunk_encoding/compressor_options.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/chunk_encoding/compressor_options.h"

#include <optional>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/options_parser.h"
#include "riegeli/brotli/brotli_writer.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/snappy/snappy_writer.h"
#include "riegeli/zstd/zstd_writer.h"

namespace riegeli {

absl::Status CompressorOptions::FromString(absl::string_view text) {
  // Set just `compression_type_` first because other parsers depend on
  // `compression_type_`.
  {
    OptionsParser options_parser;
    options_parser.AddOption(
        "uncompressed",
        ValueParser::And(ValueParser::FailIfSeen("brotli", "zstd", "snappy"),
                         [this](ValueParser& value_parser) {
                           compression_type_ = CompressionType::kNone;
                           return true;
                         }));
    options_parser.AddOption(
        "brotli", ValueParser::And(
                      ValueParser::FailIfSeen("uncompressed", "zstd", "snappy"),
                      [this](ValueParser& value_parser) {
                        compression_type_ = CompressionType::kBrotli;
                        return true;
                      }));
    options_parser.AddOption(
        "zstd", ValueParser::And(
                    ValueParser::FailIfSeen("uncompressed", "brotli", "snappy"),
                    [this](ValueParser& value_parser) {
                      compression_type_ = CompressionType::kZstd;
                      return true;
                    }));
    options_parser.AddOption(
        "snappy", ValueParser::And(
                      ValueParser::FailIfSeen("uncompressed", "brotli", "zstd"),
                      [this](ValueParser& value_parser) {
                        compression_type_ = CompressionType::kSnappy;
                        return true;
                      }));
    options_parser.AddOption("window_log",
                             [](ValueParser& value_parser) { return true; });
    options_parser.AddOption("brotli_encoder",
                             [](ValueParser& value_parser) { return true; });
    if (ABSL_PREDICT_FALSE(!options_parser.FromString(text))) {
      return options_parser.status();
    }
  }
  int window_log;
  OptionsParser options_parser;
  options_parser.AddOption(
      "uncompressed",
      ValueParser::And(ValueParser::FailIfSeen("window_log"),
                       ValueParser::Empty(0, &compression_level_)));
  options_parser.AddOption(
      "brotli",
      ValueParser::Or(
          ValueParser::Empty(
              BrotliWriterBase::Options::kDefaultCompressionLevel,
              &compression_level_),
          ValueParser::Int(BrotliWriterBase::Options::kMinCompressionLevel,
                           BrotliWriterBase::Options::kMaxCompressionLevel,
                           &compression_level_)));
  options_parser.AddOption(
      "zstd",
      ValueParser::Or(
          ValueParser::Empty(ZstdWriterBase::Options::kDefaultCompressionLevel,
                             &compression_level_),
          ValueParser::Int(ZstdWriterBase::Options::kMinCompressionLevel,
                           ZstdWriterBase::Options::kMaxCompressionLevel,
                           &compression_level_)));
  options_parser.AddOption(
      "snappy",
      ValueParser::And(
          ValueParser::FailIfSeen("window_log"),
          ValueParser::Or(
              ValueParser::Empty(
                  SnappyWriterBase::Options::kDefaultCompressionLevel,
                  &compression_level_),
              ValueParser::Int(SnappyWriterBase::Options::kMinCompressionLevel,
                               SnappyWriterBase::Options::kMaxCompressionLevel,
                               &compression_level_))));
  options_parser.AddOption("window_log", [&] {
    switch (compression_type_) {
      case CompressionType::kNone:
        return ValueParser::FailIfSeen("uncompressed");
      case CompressionType::kBrotli:
        return ValueParser::Or(
            ValueParser::Enum({{"auto", std::nullopt}}, &window_log_),
            ValueParser::And(
                ValueParser::Int(BrotliWriterBase::Options::kMinWindowLog,
                                 BrotliWriterBase::Options::kMaxWindowLog,
                                 &window_log),
                [this, &window_log](ValueParser& value_parser) {
                  window_log_ = window_log;
                  return true;
                }));
      case CompressionType::kZstd:
        return ValueParser::Or(
            ValueParser::Enum({{"auto", std::nullopt}}, &window_log_),
            ValueParser::And(
                ValueParser::Int(ZstdWriterBase::Options::kMinWindowLog,
                                 ZstdWriterBase::Options::kMaxWindowLog,
                                 &window_log),
                [this, &window_log](ValueParser& value_parser) {
                  window_log_ = window_log;
                  return true;
                }));
      case CompressionType::kSnappy:
        return ValueParser::FailIfSeen("snappy");
    }
    RIEGELI_ASSUME_UNREACHABLE() << "Unknown compression type: "
                                 << static_cast<unsigned>(compression_type_);
  }());
  options_parser.AddOption(
      "brotli_encoder",
      ValueParser::Enum(
          {{"rbrotli_or_cbrotli", BrotliEncoder::kRBrotliOrCBrotli},
           {"cbrotli", BrotliEncoder::kCBrotli},
           {"rbrotli", BrotliEncoder::kRBrotli}},
          &brotli_encoder_));
  if (ABSL_PREDICT_FALSE(!options_parser.FromString(text))) {
    return options_parser.status();
  }
  return absl::OkStatus();
}

int CompressorOptions::brotli_window_log() const {
  RIEGELI_ASSERT_EQ(compression_type_, CompressionType::kBrotli)
      << "Failed precondition of CompressorOptions::brotli_window_log(): "
         "compression type must be Brotli";
  if (window_log_ == std::nullopt) {
    return BrotliWriterBase::Options::kDefaultWindowLog;
  } else {
    RIEGELI_ASSERT_GE(*window_log_, BrotliWriterBase::Options::kMinWindowLog)
        << "Failed precondition of CompressorOptions::set_window_log(): "
           "window log out of range for Brotli";
    RIEGELI_ASSERT_LE(*window_log_, BrotliWriterBase::Options::kMaxWindowLog)
        << "Failed precondition of CompressorOptions::set_window_log(): "
           "window log out of range for Brotli";
    return *window_log_;
  }
}

std::optional<int> CompressorOptions::zstd_window_log() const {
  RIEGELI_ASSERT_EQ(compression_type_, CompressionType::kZstd)
      << "Failed precondition of CompressorOptions::zstd_window_log(): "
         "compression type must be Zstd";
  if (window_log_ != std::nullopt) {
    RIEGELI_ASSERT_GE(*window_log_, ZstdWriterBase::Options::kMinWindowLog)
        << "Failed precondition of CompressorOptions::set_window_log(): "
           "window log out of range for Zstd";
    RIEGELI_ASSERT_LE(*window_log_, ZstdWriterBase::Options::kMaxWindowLog)
        << "Failed precondition of CompressorOptions::set_window_log(): "
           "window log out of range for Zstd";
  }
  return window_log_;
}

}  // namespace riegeli


================================================
FILE: riegeli/chunk_encoding/compressor_options.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CHUNK_ENCODING_COMPRESSOR_OPTIONS_H_
#define RIEGELI_CHUNK_ENCODING_COMPRESSOR_OPTIONS_H_

#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/brotli/brotli_writer.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/snappy/snappy_writer.h"
#include "riegeli/zstd/zstd_writer.h"

namespace riegeli {

// The implementation of the Brotli encoder to use. Experimental, meant for
// evaluation. Prefer to keep the default.
//
// Rust Brotli is currently not available in open sourced Riegeli.
enum class BrotliEncoder {
  kRBrotliOrCBrotli,  // Rust Brotli if available, C Brotli otherwise. Default.
  kCBrotli,           // C Brotli.
  kRBrotli,           // Rust Brotli if available, fail otherwise.
};

class CompressorOptions {
 public:
  CompressorOptions() noexcept {}

  // Parses options from text:
  // ```
  //   options ::= option? ("," option?)*
  //   option ::=
  //     "uncompressed" |
  //     "brotli" (":" brotli_level)? |
  //     "zstd" (":" zstd_level)? |
  //     "snappy" (":" snappy_level)? |
  //     "window_log" ":" window_log |
  //     "brotli_encoder" ":" ("rbrotli_or_cbrotli" | "cbrotli" | "rbrotli")
  //   brotli_level ::= integer in the range [0..11] (default 6)
  //   zstd_level ::= integer in the range [-131072..22] (default 3)
  //   snappy_level ::= integer in the range [1..2] (default 1)
  //   window_log ::= "auto" or integer in the range [10..31]
  // ```
  //
  // Returns status:
  //  * `status.ok()`  - success
  //  * `!status.ok()` - failure
  absl::Status FromString(absl::string_view text);

  // Changes compression algorithm to Uncompressed (turns compression off).
  CompressorOptions& set_uncompressed() & ABSL_ATTRIBUTE_LIFETIME_BOUND {
    compression_type_ = CompressionType::kNone;
    compression_level_ = 0;
    return *this;
  }
  CompressorOptions&& set_uncompressed() && ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_uncompressed());
  }

  // Changes compression algorithm to Brotli. Sets compression level which
  // tunes the tradeoff between compression density and compression speed
  // (higher = better density but slower).
  //
  // `compression_level` must be between `kMinBrotli` (0) and `kMaxBrotli` (11).
  // Default: `kDefaultBrotli` (6).
  //
  // This is the default compression algorithm.
  static constexpr int kMinBrotli =
      BrotliWriterBase::Options::kMinCompressionLevel;
  static constexpr int kMaxBrotli =
      BrotliWriterBase::Options::kMaxCompressionLevel;
  static constexpr int kDefaultBrotli =
      BrotliWriterBase::Options::kDefaultCompressionLevel;
  CompressorOptions& set_brotli(int compression_level = kDefaultBrotli) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT_GE(compression_level, kMinBrotli)
        << "Failed precondition of CompressorOptions::set_brotli(): "
           "compression level out of range";
    RIEGELI_ASSERT_LE(compression_level, kMaxBrotli)
        << "Failed precondition of CompressorOptions::set_brotli(): "
           "compression level out of range";
    compression_type_ = CompressionType::kBrotli;
    compression_level_ = compression_level;
    return *this;
  }
  CompressorOptions&& set_brotli(int compression_level = kDefaultBrotli) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_brotli(compression_level));
  }

  // Changes compression algorithm to Zstd. Sets compression level which tunes
  // the tradeoff between compression density and compression speed (higher =
  // better density but slower).
  //
  // `compression_level` must be between `kMinZstd` (-131072) and
  // `kMaxZstd` (22). Level 0 is currently equivalent to 3.
  // Default: `kDefaultZstd` (3).
  static constexpr int kMinZstd = ZstdWriterBase::Options::kMinCompressionLevel;
  static constexpr int kMaxZstd = ZstdWriterBase::Options::kMaxCompressionLevel;
  static constexpr int kDefaultZstd =
      ZstdWriterBase::Options::kDefaultCompressionLevel;
  CompressorOptions& set_zstd(int compression_level = kDefaultZstd) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT_GE(compression_level, kMinZstd)
        << "Failed precondition of CompressorOptions::set_zstd(): "
           "compression level out of range";
    RIEGELI_ASSERT_LE(compression_level, kMaxZstd)
        << "Failed precondition of CompressorOptions::set_zstd(): "
           "compression level out of range";
    compression_type_ = CompressionType::kZstd;
    compression_level_ = compression_level;
    return *this;
  }
  CompressorOptions&& set_zstd(int compression_level = kDefaultZstd) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_zstd(compression_level));
  }

  // Changes compression algorithm to Snappy.  Sets compression level which
  // tunes the tradeoff between compression density and compression speed
  // (higher = better density but slower).
  static constexpr int kMinSnappy =
      SnappyWriterBase::Options::kMinCompressionLevel;
  static constexpr int kMaxSnappy =
      SnappyWriterBase::Options::kMaxCompressionLevel;
  static constexpr int kDefaultSnappy =
      SnappyWriterBase::Options::kDefaultCompressionLevel;
  CompressorOptions& set_snappy(int compression_level = kDefaultSnappy) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    compression_type_ = CompressionType::kSnappy;
    compression_level_ = compression_level;
    return *this;
  }
  CompressorOptions&& set_snappy(int compression_level = kDefaultSnappy) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_snappy(compression_level));
  }

  CompressionType compression_type() const { return compression_type_; }

  int compression_level() const { return compression_level_; }

  // Logarithm of the LZ77 sliding window size. This tunes the tradeoff
  // between compression density and memory usage (higher = better density but
  // more memory).
  //
  // Special value `std::nullopt` means to keep the default (Brotli: 22,
  // Zstd: derived from compression level and chunk size).
  //
  // For Uncompressed and Snappy, `window_log` must be `std::nullopt`.
  //
  // For Brotli, `window_log` must be `std::nullopt` or between
  // `BrotliWriterBase::Options::kMinWindowLog` (10) and
  // `BrotliWriterBase::Options::kMaxWindowLog` (30).
  //
  // For Zstd, `window_log` must be `std::nullopt` or between
  // `ZstdWriterBase::Options::kMinWindowLog` (10) and
  // `ZstdWriterBase::Options::kMaxWindowLog` (30 in 32-bit build,
  // 31 in 64-bit build).
  //
  // Default: `std::nullopt`.
  static constexpr int kMinWindowLog =
      SignedMin(BrotliWriterBase::Options::kMinWindowLog,
                ZstdWriterBase::Options::kMinWindowLog);
  static constexpr int kMaxWindowLog =
      SignedMax(BrotliWriterBase::Options::kMaxWindowLog,
                ZstdWriterBase::Options::kMaxWindowLog);
  CompressorOptions& set_window_log(std::optional<int> window_log) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    if (window_log != std::nullopt) {
      RIEGELI_ASSERT_GE(*window_log, kMinWindowLog)
          << "Failed precondition of CompressorOptions::set_window_log(): "
             "window log out of range";
      RIEGELI_ASSERT_LE(*window_log, kMaxWindowLog)
          << "Failed precondition of CompressorOptions::set_window_log(): "
             "window log out of range";
    }
    window_log_ = window_log;
    return *this;
  }
  CompressorOptions&& set_window_log(std::optional<int> window_log) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_window_log(window_log));
  }
  std::optional<int> window_log() const { return window_log_; }

  // Returns `window_log()` translated for `BrotliWriter`.
  //
  // Precondition: `compression_type() == CompressionType::kBrotli`
  int brotli_window_log() const;

  // Returns `window_log()` translated for `ZstdWriter`.
  //
  // Precondition: `compression_type() == CompressionType::kZstd`
  std::optional<int> zstd_window_log() const;

  // The implementation of the Brotli encoder to use. Experimental, meant for
  // evaluation. Prefer to keep the default.
  //
  // This is ignored if `compression_type() != CompressionType::kBrotli`.
  //
  // If Rust Brotli is used, the interpretation of compression levels is
  // slightly different (in particular compression levels smaller than 3 are
  // equivalent to 3, and compression levels larger than 7 are equivalent to 7),
  // and `window_log()` is ignored.
  //
  // Default: `BrotliEncoder::kRBrotliOrCBrotli`.
  CompressorOptions& set_brotli_encoder(BrotliEncoder brotli_encoder) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    brotli_encoder_ = brotli_encoder;
    return *this;
  }
  CompressorOptions&& set_brotli_encoder(BrotliEncoder brotli_encoder) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_brotli_encoder(brotli_encoder));
  }
  BrotliEncoder brotli_encoder() const { return brotli_encoder_; }

 private:
  CompressionType compression_type_ = CompressionType::kBrotli;
  int compression_level_ = kDefaultBrotli;
  std::optional<int> window_log_;
  BrotliEncoder brotli_encoder_ = BrotliEncoder::kRBrotliOrCBrotli;
};

}  // namespace riegeli

#endif  // RIEGELI_CHUNK_ENCODING_COMPRESSOR_OPTIONS_H_


================================================
FILE: riegeli/chunk_encoding/constants.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CHUNK_ENCODING_CONSTANTS_H_
#define RIEGELI_CHUNK_ENCODING_CONSTANTS_H_

#include <stdint.h>

#include <limits>

namespace riegeli {

// These values are frozen in the file format.
enum class ChunkType : uint8_t {
  kFileSignature = 's',
  kFileMetadata = 'm',
  kPadding = 'p',
  kSimple = 'r',
  kTransposed = 't',
};

// These values are frozen in the file format.
enum class CompressionType : uint8_t {
  kNone = 0,
  kBrotli = 'b',
  kZstd = 'z',
  kSnappy = 's',
};

inline constexpr uint64_t kMaxNumRecords =
    std::numeric_limits<uint64_t>::max() >> 8;

}  // namespace riegeli

#endif  // RIEGELI_CHUNK_ENCODING_CONSTANTS_H_


================================================
FILE: riegeli/chunk_encoding/decompressor.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/chunk_encoding/decompressor.h"

#include <stddef.h>
#include <stdint.h>

#include <optional>

#include "absl/base/optimization.h"
#include "riegeli/base/chain.h"
#include "riegeli/bytes/chain_reader.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/varint/varint_reading.h"

namespace riegeli::chunk_encoding_internal {

std::optional<uint64_t> UncompressedSize(const Chain& compressed_data,
                                         CompressionType compression_type) {
  if (compression_type == CompressionType::kNone) return compressed_data.size();
  ChainReader<> compressed_data_reader(&compressed_data);
  uint64_t size;
  if (ABSL_PREDICT_FALSE(!ReadVarint64(compressed_data_reader, size))) {
    return std::nullopt;
  }
  return size;
}

}  // namespace riegeli::chunk_encoding_internal


================================================
FILE: riegeli/chunk_encoding/decompressor.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CHUNK_ENCODING_DECOMPRESSOR_H_
#define RIEGELI_CHUNK_ENCODING_DECOMPRESSOR_H_

#include <stdint.h>

#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "riegeli/base/any.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/brotli/brotli_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/snappy/snappy_reader.h"
#include "riegeli/varint/varint_reading.h"
#include "riegeli/zstd/zstd_reader.h"

namespace riegeli::chunk_encoding_internal {

// Returns uncompressed size of `compressed_data`.
//
// If `compression_type` is `kNone`, uncompressed size is the same as compressed
// size, otherwise reads uncompressed size as a varint from the beginning of
// compressed_data.
//
// Returns `std::nullopt` on failure.
std::optional<uint64_t> UncompressedSize(const Chain& compressed_data,
                                         CompressionType compression_type);

// Options for a `Decompressor`.
class DecompressorOptions {
 public:
  DecompressorOptions() noexcept {}

  // Options for a global `RecyclingPool` of decompression contexts.
  //
  // They tune the amount of memory which is kept to speed up creation of new
  // decompression sessions, and usage of a background thread to clean it.
  //
  // Default: `RecyclingPoolOptions()`.
  DecompressorOptions& set_recycling_pool_options(
      const RecyclingPoolOptions& recycling_pool_options) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    recycling_pool_options_ = recycling_pool_options;
    return *this;
  }
  DecompressorOptions&& set_recycling_pool_options(
      const RecyclingPoolOptions& recycling_pool_options) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_recycling_pool_options(recycling_pool_options));
  }
  const RecyclingPoolOptions& recycling_pool_options() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return recycling_pool_options_;
  }

 private:
  RecyclingPoolOptions recycling_pool_options_;
};

// Decompresses a compressed stream.
//
// If `compression_type` is not `kNone`, reads uncompressed size as a varint
// from the beginning of compressed data.
template <typename Src = Reader*>
class Decompressor : public Object {
 public:
  using Options = DecompressorOptions;

  // Creates a closed `Decompressor`.
  explicit Decompressor(Closed) noexcept : Object(kClosed) {}

  // Will read from the compressed stream provided by `src`.
  explicit Decompressor(Initializer<Src> src, CompressionType compression_type,
                        DecompressorOptions options = DecompressorOptions());

  Decompressor(Decompressor&& that) = default;
  Decompressor& operator=(Decompressor&& that) = default;

  // Makes `*this` equivalent to a newly constructed `Decompressor`. This avoids
  // constructing a temporary `Decompressor` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      Initializer<Src> src, CompressionType compression_type,
      DecompressorOptions options = DecompressorOptions());

  // Returns the `Reader` from which uncompressed data should be read.
  //
  // Precondition: `ok()`
  Reader& reader() ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Verifies that the source ends at the current position (i.e. has no more
  // compressed data and has no data after the compressed stream), failing the
  // `Decompressor` if not. Closes the `Decompressor`.
  //
  // Return values:
  //  * `true`  - success (the source ends at the former current position)
  //  * `false` - failure (the source does not end at the former current
  //                       position or the `Decompressor` was not OK before
  //                       closing)
  bool VerifyEndAndClose();

  // Verifies that the source ends at the current position (i.e. has no more
  // compressed data and has no data after the compressed stream), failing the
  // `Decompressor` if not.
  void VerifyEnd();

 protected:
  void Done() override;

 private:
  void Initialize(Initializer<Src> src, CompressionType compression_type,
                  const RecyclingPoolOptions& recycling_pool_options);

  Any<Reader*>::Inlining<Src, BrotliReader<Src>, ZstdReader<Src>,
                         SnappyReader<Src>>
      decompressed_;
};

// Implementation details follow.

template <typename Src>
inline Decompressor<Src>::Decompressor(Initializer<Src> src,
                                       CompressionType compression_type,
                                       DecompressorOptions options) {
  Initialize(std::move(src), compression_type,
             options.recycling_pool_options());
}

template <typename Src>
inline void Decompressor<Src>::Reset(Closed) {
  Object::Reset(kClosed);
  decompressed_.Reset();
}

template <typename Src>
inline void Decompressor<Src>::Reset(Initializer<Src> src,
                                     CompressionType compression_type,
                                     DecompressorOptions options) {
  Object::Reset();
  Initialize(std::move(src), compression_type,
             options.recycling_pool_options());
}

template <typename Src>
inline void Decompressor<Src>::Initialize(
    Initializer<Src> src, CompressionType compression_type,
    const RecyclingPoolOptions& recycling_pool_options) {
  if (compression_type == CompressionType::kNone) {
    decompressed_ = std::move(src);
    return;
  }
  Dependency<Reader*, Src> compressed_reader(std::move(src));
  uint64_t uncompressed_size;
  if (ABSL_PREDICT_FALSE(
          !ReadVarint64(*compressed_reader, uncompressed_size))) {
    Fail(compressed_reader->StatusOrAnnotate(
        absl::InvalidArgumentError("Reading uncompressed size failed")));
    return;
  }
  switch (compression_type) {
    case CompressionType::kNone:
      RIEGELI_ASSUME_UNREACHABLE() << "kNone handled above";
    case CompressionType::kBrotli:
      decompressed_ = riegeli::Maker<BrotliReader<Src>>(
          std::move(compressed_reader.manager()));
      return;
    case CompressionType::kZstd:
      decompressed_ = riegeli::Maker<ZstdReader<Src>>(
          std::move(compressed_reader.manager()),
          ZstdReaderBase::Options().set_recycling_pool_options(
              recycling_pool_options));
      return;
    case CompressionType::kSnappy:
      decompressed_ = riegeli::Maker<SnappyReader<Src>>(
          std::move(compressed_reader.manager()));
      return;
  }
  Fail(absl::UnimplementedError(absl::StrCat(
      "Unknown compression type: ", static_cast<unsigned>(compression_type))));
}

template <typename Src>
inline Reader& Decompressor<Src>::reader() ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_ASSERT_OK(*this) << "Failed precondition of Decompressor::reader()";
  return *decompressed_;
}

template <typename Src>
void Decompressor<Src>::Done() {
  if (ABSL_PREDICT_FALSE(!decompressed_->Close())) {
    Fail(decompressed_->status());
  }
}

template <typename Src>
inline bool Decompressor<Src>::VerifyEndAndClose() {
  VerifyEnd();
  return Close();
}

template <typename Src>
inline void Decompressor<Src>::VerifyEnd() {
  if (ABSL_PREDICT_TRUE(ok())) decompressed_->VerifyEnd();
}

}  // namespace riegeli::chunk_encoding_internal

#endif  // RIEGELI_CHUNK_ENCODING_DECOMPRESSOR_H_


================================================
FILE: riegeli/chunk_encoding/deferred_encoder.cc
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/chunk_encoding/deferred_encoder.h"

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <memory>
#include <utility>
#include <vector>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/bytes/chain_writer.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/chunk_encoder.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/messages/serialize_message.h"

namespace riegeli {

void DeferredEncoder::Clear() {
  ChunkEncoder::Clear();
  base_encoder_->Clear();
  records_writer_.Reset();
  limits_.clear();
}

bool DeferredEncoder::AddRecord(const google::protobuf::MessageLite& record,
                                SerializeMessageOptions serialize_options) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const size_t size = serialize_options.GetByteSize(record);
  if (ABSL_PREDICT_FALSE(num_records_ ==
                         UnsignedMin(limits_.max_size(), kMaxNumRecords))) {
    return Fail(absl::ResourceExhaustedError("Too many records"));
  }
  if (ABSL_PREDICT_FALSE(size > std::numeric_limits<uint64_t>::max() -
                                    decoded_data_size_)) {
    return Fail(absl::ResourceExhaustedError("Decoded data size too large"));
  }
  ++num_records_;
  decoded_data_size_ += IntCast<uint64_t>(size);
  if (absl::Status status =
          SerializeMessage(record, records_writer_, serialize_options);
      ABSL_PREDICT_FALSE(!status.ok())) {
    return Fail(std::move(status));
  }
  limits_.push_back(IntCast<size_t>(records_writer_.pos()));
  return true;
}

bool DeferredEncoder::AddRecord(BytesRef record) {
  return AddRecordImpl(record);
}

bool DeferredEncoder::AddRecord(ExternalRef record) {
  return AddRecordImpl(std::move(record));
}

bool DeferredEncoder::AddRecord(const Chain& record) {
  return AddRecordImpl(record);
}

bool DeferredEncoder::AddRecord(Chain&& record) {
  return AddRecordImpl(std::move(record));
}

bool DeferredEncoder::AddRecord(const absl::Cord& record) {
  return AddRecordImpl(record);
}

bool DeferredEncoder::AddRecord(absl::Cord&& record) {
  return AddRecordImpl(std::move(record));
}

template <typename Record>
bool DeferredEncoder::AddRecordImpl(Record&& record) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(num_records_ ==
                         UnsignedMin(limits_.max_size(), kMaxNumRecords))) {
    return Fail(absl::ResourceExhaustedError("Too many records"));
  }
  if (ABSL_PREDICT_FALSE(record.size() > std::numeric_limits<uint64_t>::max() -
                                             decoded_data_size_)) {
    return Fail(absl::ResourceExhaustedError("Decoded data size too large"));
  }
  ++num_records_;
  decoded_data_size_ += IntCast<uint64_t>(record.size());
  if (ABSL_PREDICT_FALSE(
          !records_writer_.Write(std::forward<Record>(record)))) {
    return Fail(records_writer_.status());
  }
  limits_.push_back(IntCast<size_t>(records_writer_.pos()));
  return true;
}

bool DeferredEncoder::AddRecords(Chain records, std::vector<size_t> limits) {
  RIEGELI_ASSERT_EQ(limits.empty() ? 0u : limits.back(), records.size())
      << "Failed precondition of ChunkEncoder::AddRecords(): "
         "record end positions do not match concatenated record values";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(limits.size() >
                         UnsignedMin(limits_.max_size(), kMaxNumRecords) -
                             num_records_)) {
    return Fail(absl::ResourceExhaustedError("Too many records"));
  }
  num_records_ += IntCast<uint64_t>(limits.size());
  decoded_data_size_ += IntCast<uint64_t>(records.size());
  if (ABSL_PREDICT_FALSE(!records_writer_.Write(std::move(records)))) {
    return Fail(records_writer_.status());
  }
  if (limits_.empty()) {
    limits_ = std::move(limits);
  } else {
    const size_t base = limits_.back();
    for (size_t& limit : limits) limit += base;
    limits_.insert(limits_.cend(), limits.begin(), limits.end());
  }
  return true;
}

bool DeferredEncoder::EncodeAndClose(Writer& dest, ChunkType& chunk_type,
                                     uint64_t& num_records,
                                     uint64_t& decoded_data_size) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(!records_writer_.Close())) {
    return Fail(records_writer_.status());
  }
  if (ABSL_PREDICT_FALSE(!base_encoder_->AddRecords(
          std::move(records_writer_.dest()), std::move(limits_))) ||
      ABSL_PREDICT_FALSE(!base_encoder_->EncodeAndClose(
          dest, chunk_type, num_records, decoded_data_size))) {
    Fail(base_encoder_->status());
  }
  return Close();
}

}  // namespace riegeli


================================================
FILE: riegeli/chunk_encoding/deferred_encoder.h
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CHUNK_ENCODING_DEFERRED_ENCODER_H_
#define RIEGELI_CHUNK_ENCODING_DEFERRED_ENCODER_H_

#include <stddef.h>
#include <stdint.h>

#include <memory>
#include <utility>
#include <vector>

#include "absl/strings/cord.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/bytes/chain_writer.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/chunk_encoder.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/messages/serialize_message.h"

namespace riegeli {

// `DeferredEncoder` performs a minimal amount of the encoding work in
// `AddRecord()`, deferring as much as possible to `EncodeAndClose()`.
// It does more memory copying than the base encoder though.
class DeferredEncoder : public ChunkEncoder {
 public:
  explicit DeferredEncoder(std::unique_ptr<ChunkEncoder> base_encoder);

  void Clear() override;

  using ChunkEncoder::AddRecord;
  bool AddRecord(const google::protobuf::MessageLite& record,
                 SerializeMessageOptions serialize_options) override;
  bool AddRecord(BytesRef record) override;
  bool AddRecord(ExternalRef record) override;
  bool AddRecord(const Chain& record) override;
  bool AddRecord(Chain&& record) override;
  bool AddRecord(const absl::Cord& record) override;
  bool AddRecord(absl::Cord&& record) override;

  bool AddRecords(Chain records, std::vector<size_t> limits) override;

  bool EncodeAndClose(Writer& dest, ChunkType& chunk_type,
                      uint64_t& num_records,
                      uint64_t& decoded_data_size) override;

 private:
  // This template is defined and used only in deferred_encoder.cc.
  template <typename Record>
  bool AddRecordImpl(Record&& record);

  std::unique_ptr<ChunkEncoder> base_encoder_;
  // `Writer` of concatenated record values.
  ChainWriter<Chain> records_writer_;
  // Sorted record end positions.
  //
  // Invariant: `limits_.size() == num_records_`
  std::vector<size_t> limits_;

  // Invariant:
  //   `records_writer_.pos() == (limits_.empty() ? 0 : limits_.back())`
};

// Implementation details follow.

inline DeferredEncoder::DeferredEncoder(
    std::unique_ptr<ChunkEncoder> base_encoder)
    : base_encoder_(std::move(base_encoder)) {}

}  // namespace riegeli

#endif  // RIEGELI_CHUNK_ENCODING_DEFERRED_ENCODER_H_


================================================
FILE: riegeli/chunk_encoding/field_projection.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CHUNK_ENCODING_FIELD_PROJECTION_H_
#define RIEGELI_CHUNK_ENCODING_FIELD_PROJECTION_H_

#include <initializer_list>
#include <utility>

#include "absl/container/inlined_vector.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"

namespace riegeli {

// Specifies a proto field path.
class Field {
 public:
  using Path = absl::InlinedVector<int, 1>;

  // A special field number value which can be added to the end of the path.
  //
  // It preserves field existence but ignores its value, which is replaced with
  // a default value for the type (zero, empty string, empty message).
  //
  // This is useful to include a required field which is not otherwise needed.
  // This works similarly to specifying a non-existent child field, but applies
  // not only to submessages.
  //
  // Warning: for a repeated field this preserves the field count only if the
  // field is not packed.
  static constexpr int kExistenceOnly = 0;

  // Specifies the path using a sequence of proto field numbers descending from
  // the root message.
  //
  // Field numbers can be obtained from `Type::k*FieldNumber` constants exported
  // by compiled proto messages, or from `FieldDescriptor::number()`.
  /*implicit*/ Field(std::initializer_list<int> path);
  Field& operator=(std::initializer_list<int> path);

  // Starts with the root message. Field path can be built with
  // `AddFieldNumber()`.
  Field() = default;

  Field(const Field& that) = default;
  Field& operator=(const Field& that) = default;

  Field(Field&& that) = default;
  Field& operator=(Field&& that) = default;

  // Adds a field to the end of the path.
  Field& AddFieldNumber(int field_number) &;
  Field&& AddFieldNumber(int field_number) &&;

  // Returns the sequence of proto field numbers descending from the root
  // message.
  const Path& path() const { return path_; }

 private:
  static void AssertValid(int field_number);

  Path path_;
};

// Specifies a set of fields to include.
class FieldProjection {
 public:
  using Fields = absl::InlinedVector<Field, 1>;

  // Includes all fields.
  static FieldProjection All();

  // Includes only the specified fields.
  /*implicit*/ FieldProjection(std::initializer_list<Field> fields);
  FieldProjection& operator=(std::initializer_list<Field> fields);

  // Starts with an empty set to include. Fields can be added by `AddField()`.
  FieldProjection() = default;

  FieldProjection(const FieldProjection&) = default;
  FieldProjection& operator=(const FieldProjection&) = default;

  FieldProjection(FieldProjection&&) = default;
  FieldProjection& operator=(FieldProjection&&) = default;

  // Adds a field to the set to include.
  FieldProjection& AddField(Initializer<Field> field) &;
  FieldProjection&& AddField(Initializer<Field> field) &&;
  FieldProjection& AddField(std::initializer_list<int> path) &;
  FieldProjection&& AddField(std::initializer_list<int> path) &&;

  // Returns true if all fields are included, i.e. if the root message is
  // included.
  bool includes_all() const;

  // Returns the set of fields to include.
  const Fields& fields() const { return fields_; }

 private:
  Fields fields_;
};

// Implementation details follow.

inline Field::Field(std::initializer_list<int> path) : path_(path) {
  for (const int field_number : path_) AssertValid(field_number);
}

inline Field& Field::operator=(std::initializer_list<int> path) {
  for (const int field_number : path) AssertValid(field_number);
  path_ = path;
  return *this;
}

inline void Field::AssertValid(int field_number) {
  static_assert(kExistenceOnly == 0,
                "Field::AssertValid() assumes that kExistenceOnly == 0");
  RIEGELI_ASSERT_GE(field_number, 0) << "Field number out of range";
  RIEGELI_ASSERT_LE(field_number, (1 << 29) - 1) << "Field number out of range";
}

inline Field& Field::AddFieldNumber(int field_number) & {
  AssertValid(field_number);
  path_.push_back(field_number);
  return *this;
}

inline Field&& Field::AddFieldNumber(int field_number) && {
  return std::move(AddFieldNumber(field_number));
}

inline FieldProjection FieldProjection::All() {
  FieldProjection field_projection;
  field_projection.AddField(Field());
  return field_projection;
}

inline FieldProjection::FieldProjection(std::initializer_list<Field> fields)
    : fields_(fields) {}

inline FieldProjection& FieldProjection::operator=(
    std::initializer_list<Field> fields) {
  fields_ = fields;
  return *this;
}

inline FieldProjection& FieldProjection::AddField(Initializer<Field> field) & {
  fields_.emplace_back(std::move(field));
  return *this;
}

inline FieldProjection&& FieldProjection::AddField(
    Initializer<Field> field) && {
  return std::move(AddField(std::move(field)));
}

inline FieldProjection& FieldProjection::AddField(
    std::initializer_list<int> path) & {
  return AddField(riegeli::Maker<Field>(path));
}

inline FieldProjection&& FieldProjection::AddField(
    std::initializer_list<int> path) && {
  return std::move(AddField(path));
}

inline bool FieldProjection::includes_all() const {
  for (const Field& field : fields_) {
    if (field.path().empty()) return true;
  }
  return false;
}

}  // namespace riegeli

#endif  // RIEGELI_CHUNK_ENCODING_FIELD_PROJECTION_H_


================================================
FILE: riegeli/chunk_encoding/hash.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/chunk_encoding/hash.h"

#include <stddef.h>
#include <stdint.h>

#include <optional>

#include "absl/container/fixed_array.h"
#include "absl/strings/string_view.h"
#include "highwayhash/hh_types.h"
#include "highwayhash/highwayhash_target.h"
#include "highwayhash/instruction_sets.h"
#include "riegeli/base/chain.h"

namespace riegeli::chunk_encoding_internal {

namespace {

alignas(32) const highwayhash::HHKey kHashKey = {
    0x2f696c6567656952,  // 'Riegeli/'
    0x0a7364726f636572,  // 'records\n'
    0x2f696c6567656952,  // 'Riegeli/'
    0x0a7364726f636572,  // 'records\n'
};

}  // namespace

uint64_t Hash(absl::string_view data) {
  highwayhash::HHResult64 result;
  highwayhash::InstructionSets::Run<highwayhash::HighwayHash>(
      kHashKey, data.data(), data.size(), &result);
  return result;
}

uint64_t Hash(const Chain& data) {
  if (const std::optional<absl::string_view> flat = data.TryFlat();
      flat != std::nullopt) {
    return Hash(*flat);
  }
  absl::FixedArray<highwayhash::StringView> fragments(data.blocks().size());
  size_t i = 0;
  for (const absl::string_view fragment : data.blocks()) {
    fragments[i++] = highwayhash::StringView{fragment.data(), fragment.size()};
  }
  highwayhash::HHResult64 result;
  highwayhash::InstructionSets::Run<highwayhash::HighwayHashCat>(
      kHashKey, fragments.data(), fragments.size(), &result);
  return result;
}

}  // namespace riegeli::chunk_encoding_internal


================================================
FILE: riegeli/chunk_encoding/hash.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CHUNK_ENCODING_HASH_H_
#define RIEGELI_CHUNK_ENCODING_HASH_H_

#include <stdint.h>

#include "absl/strings/string_view.h"
#include "riegeli/base/chain.h"

namespace riegeli::chunk_encoding_internal {

uint64_t Hash(absl::string_view data);
uint64_t Hash(const Chain& data);

}  // namespace riegeli::chunk_encoding_internal

#endif  // RIEGELI_CHUNK_ENCODING_HASH_H_


================================================
FILE: riegeli/chunk_encoding/simple_decoder.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/chunk_encoding/simple_decoder.h"

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <vector>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/bytes/limiting_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/chunk_encoding/decompressor.h"
#include "riegeli/varint/varint_reading.h"

namespace riegeli {

void SimpleDecoder::Done() {
  if (ABSL_PREDICT_FALSE(!values_decompressor_.Close())) {
    Fail(values_decompressor_.status());
  }
}

bool SimpleDecoder::Decode(Reader* src, uint64_t num_records,
                           uint64_t decoded_data_size,
                           std::vector<size_t>& limits) {
  Object::Reset();
  if (ABSL_PREDICT_FALSE(num_records > limits.max_size())) {
    return Fail(absl::ResourceExhaustedError("Too many records"));
  }
  if (ABSL_PREDICT_FALSE(decoded_data_size >
                         std::numeric_limits<size_t>::max())) {
    return Fail(absl::ResourceExhaustedError("Records too large"));
  }

  uint8_t compression_type_byte;
  if (ABSL_PREDICT_FALSE(!src->ReadByte(compression_type_byte))) {
    return Fail(src->StatusOrAnnotate(
        absl::InvalidArgumentError("Reading compression type failed")));
  }
  const CompressionType compression_type =
      static_cast<CompressionType>(compression_type_byte);

  uint64_t sizes_size;
  if (ABSL_PREDICT_FALSE(!ReadVarint64(*src, sizes_size))) {
    return Fail(src->StatusOrAnnotate(
        absl::InvalidArgumentError("Reading size of sizes failed")));
  }

  chunk_encoding_internal::Decompressor<LimitingReader<>> sizes_decompressor(
      riegeli::Maker(
          src, LimitingReaderBase::Options().set_exact_length(sizes_size)),
      compression_type,
      chunk_encoding_internal::DecompressorOptions().set_recycling_pool_options(
          recycling_pool_options_));
  if (ABSL_PREDICT_FALSE(!sizes_decompressor.ok())) {
    return Fail(sizes_decompressor.status());
  }
  limits.clear();
  size_t limit = 0;
  while (limits.size() != num_records) {
    uint64_t size;
    if (ABSL_PREDICT_FALSE(!ReadVarint64(sizes_decompressor.reader(), size))) {
      return Fail(sizes_decompressor.reader().StatusOrAnnotate(
          absl::InvalidArgumentError("Reading record size failed")));
    }
    if (ABSL_PREDICT_FALSE(size > decoded_data_size - limit)) {
      return Fail(
          absl::InvalidArgumentError("Decoded data size larger than expected"));
    }
    limit += IntCast<size_t>(size);
    limits.push_back(limit);
  }
  if (ABSL_PREDICT_FALSE(!sizes_decompressor.VerifyEndAndClose())) {
    return Fail(sizes_decompressor.status());
  }
  if (ABSL_PREDICT_FALSE(limit != decoded_data_size)) {
    return Fail(
        absl::InvalidArgumentError("Decoded data size smaller than expected"));
  }

  values_decompressor_.Reset(
      src, compression_type,
      chunk_encoding_internal::DecompressorOptions().set_recycling_pool_options(
          recycling_pool_options_));
  if (ABSL_PREDICT_FALSE(!values_decompressor_.ok())) {
    return Fail(values_decompressor_.status());
  }
  return true;
}

bool SimpleDecoder::VerifyEndAndClose() {
  values_decompressor_.VerifyEnd();
  return Close();
}

}  // namespace riegeli


================================================
FILE: riegeli/chunk_encoding/simple_decoder.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CHUNK_ENCODING_SIMPLE_DECODER_H_
#define RIEGELI_CHUNK_ENCODING_SIMPLE_DECODER_H_

#include <stddef.h>
#include <stdint.h>

#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/object.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/chunk_encoding/decompressor.h"

namespace riegeli {

class SimpleDecoder : public Object {
 public:
  class Options {
   public:
    Options() noexcept {}

    // Options for a global `RecyclingPool` of decompression contexts.
    //
    // They tune the amount of memory which is kept to speed up creation of new
    // decompression sessions, and usage of a background thread to clean it.
    //
    // Default: `RecyclingPoolOptions()`.
    Options& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      recycling_pool_options_ = recycling_pool_options;
      return *this;
    }
    Options&& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_recycling_pool_options(recycling_pool_options));
    }
    const RecyclingPoolOptions& recycling_pool_options() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recycling_pool_options_;
    }

   private:
    RecyclingPoolOptions recycling_pool_options_;
  };

  // Creates a closed `SimpleDecoder`.
  explicit SimpleDecoder(Options options = Options())
      : Object(kClosed),
        recycling_pool_options_(options.recycling_pool_options()),
        values_decompressor_(kClosed) {}

  SimpleDecoder(const SimpleDecoder&) = delete;
  SimpleDecoder& operator=(const SimpleDecoder&) = delete;

  // Resets the `SimpleDecoder` and parses the chunk.
  //
  // Makes concatenated record values available for reading from `reader()`.
  // Sets `limits` to sorted record end positions.
  //
  // `*src` is not owned by this `SimpleDecoder` and must be kept alive but not
  // accessed until closing the `SimpleDecoder`.
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  bool Decode(Reader* src, uint64_t num_records, uint64_t decoded_data_size,
              std::vector<size_t>& limits);

  // Returns the `Reader` from which concatenated record values should be read.
  //
  // Precondition: `ok()`
  Reader& reader() ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Verifies that the concatenated record values end at the current position,
  // failing the `SimpleDecoder` if not. Closes the `SimpleDecoder`.
  //
  // Return values:
  //  * `true`  - success (concatenated messages end at the former current
  //              position)
  //  * `false` - failure (concatenated messages do not end at the former
  //              current position or the `SimpleDecoder` was not OK before
  //              closing)
  bool VerifyEndAndClose();

 protected:
  void Done() override;

 private:
  RecyclingPoolOptions recycling_pool_options_;
  chunk_encoding_internal::Decompressor<> values_decompressor_;
};

// Implementation details follow.

inline Reader& SimpleDecoder::reader() ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_ASSERT_OK(*this) << "Failed precondition of SimpleDecoder::reader()";
  return values_decompressor_.reader();
}

}  // namespace riegeli

#endif  // RIEGELI_CHUNK_ENCODING_SIMPLE_DECODER_H_


================================================
FILE: riegeli/chunk_encoding/simple_encoder.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/chunk_encoding/simple_encoder.h"

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <utility>
#include <vector>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/chunk_encoder.h"
#include "riegeli/chunk_encoding/compressor.h"
#include "riegeli/chunk_encoding/compressor_options.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/messages/serialize_message.h"
#include "riegeli/varint/varint_writing.h"

namespace riegeli {

SimpleEncoder::SimpleEncoder(CompressorOptions compressor_options,
                             TuningOptions tuning_options)
    : compression_type_(compressor_options.compression_type()),
      sizes_compressor_(compressor_options,
                        chunk_encoding_internal::Compressor::TuningOptions()
                            .set_recycling_pool_options(
                                tuning_options.recycling_pool_options())),
      values_compressor_(compressor_options,
                         chunk_encoding_internal::Compressor::TuningOptions()
                             .set_size_hint(tuning_options.size_hint())
                             .set_recycling_pool_options(
                                 tuning_options.recycling_pool_options())) {}

void SimpleEncoder::Clear() {
  ChunkEncoder::Clear();
  sizes_compressor_.Clear();
  values_compressor_.Clear();
}

bool SimpleEncoder::AddRecord(const google::protobuf::MessageLite& record,
                              SerializeMessageOptions serialize_options) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const size_t size = serialize_options.GetByteSize(record);
  if (ABSL_PREDICT_FALSE(num_records_ == kMaxNumRecords)) {
    return Fail(absl::ResourceExhaustedError("Too many records"));
  }
  if (ABSL_PREDICT_FALSE(size > std::numeric_limits<uint64_t>::max() -
                                    decoded_data_size_)) {
    return Fail(absl::ResourceExhaustedError("Decoded data size too large"));
  }
  ++num_records_;
  decoded_data_size_ += IntCast<uint64_t>(size);
  if (ABSL_PREDICT_FALSE(!WriteVarint64(IntCast<uint64_t>(size),
                                        sizes_compressor_.writer()))) {
    return Fail(sizes_compressor_.writer().status());
  }
  if (absl::Status status = SerializeMessage(
          record, values_compressor_.writer(), serialize_options);
      ABSL_PREDICT_FALSE(!status.ok())) {
    return Fail(std::move(status));
  }
  return true;
}

bool SimpleEncoder::AddRecord(BytesRef record) { return AddRecordImpl(record); }

bool SimpleEncoder::AddRecord(ExternalRef record) {
  return AddRecordImpl(std::move(record));
}

bool SimpleEncoder::AddRecord(const Chain& record) {
  return AddRecordImpl(record);
}

bool SimpleEncoder::AddRecord(Chain&& record) {
  return AddRecordImpl(std::move(record));
}

bool SimpleEncoder::AddRecord(const absl::Cord& record) {
  return AddRecordImpl(record);
}

bool SimpleEncoder::AddRecord(absl::Cord&& record) {
  return AddRecordImpl(std::move(record));
}

template <typename Record>
bool SimpleEncoder::AddRecordImpl(Record&& record) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(num_records_ == kMaxNumRecords)) {
    return Fail(absl::ResourceExhaustedError("Too many records"));
  }
  if (ABSL_PREDICT_FALSE(record.size() > std::numeric_limits<uint64_t>::max() -
                                             decoded_data_size_)) {
    return Fail(absl::ResourceExhaustedError("Decoded data size too large"));
  }
  ++num_records_;
  decoded_data_size_ += IntCast<uint64_t>(record.size());
  if (ABSL_PREDICT_FALSE(!WriteVarint64(IntCast<uint64_t>(record.size()),
                                        sizes_compressor_.writer()))) {
    return Fail(sizes_compressor_.writer().status());
  }
  if (ABSL_PREDICT_FALSE(
          !values_compressor_.writer().Write(std::forward<Record>(record)))) {
    return Fail(values_compressor_.writer().status());
  }
  return true;
}

bool SimpleEncoder::AddRecords(Chain records, std::vector<size_t> limits) {
  RIEGELI_ASSERT_EQ(limits.empty() ? size_t{0} : limits.back(), records.size())
      << "Failed precondition of ChunkEncoder::AddRecords(): "
         "record end positions do not match concatenated record values";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(limits.size() > kMaxNumRecords - num_records_)) {
    return Fail(absl::ResourceExhaustedError("Too many records"));
  }
  if (ABSL_PREDICT_FALSE(records.size() > std::numeric_limits<uint64_t>::max() -
                                              decoded_data_size_)) {
    return Fail(absl::ResourceExhaustedError("Decoded data size too large"));
  }
  num_records_ += IntCast<uint64_t>(limits.size());
  decoded_data_size_ += IntCast<uint64_t>(records.size());
  size_t start = 0;
  for (const size_t limit : limits) {
    RIEGELI_ASSERT_GE(limit, start)
        << "Failed precondition of ChunkEncoder::AddRecords(): "
           "record end positions not sorted";
    RIEGELI_ASSERT_LE(limit, records.size())
        << "Failed precondition of ChunkEncoder::AddRecords(): "
           "record end positions do not match concatenated record values";
    if (ABSL_PREDICT_FALSE(!WriteVarint64(IntCast<uint64_t>(limit - start),
                                          sizes_compressor_.writer()))) {
      return Fail(sizes_compressor_.writer().status());
    }
    start = limit;
  }
  if (ABSL_PREDICT_FALSE(
          !values_compressor_.writer().Write(std::move(records)))) {
    return Fail(values_compressor_.writer().status());
  }
  return true;
}

bool SimpleEncoder::EncodeAndClose(Writer& dest, ChunkType& chunk_type,
                                   uint64_t& num_records,
                                   uint64_t& decoded_data_size) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  chunk_type = ChunkType::kSimple;
  num_records = num_records_;
  decoded_data_size = decoded_data_size_;

  if (ABSL_PREDICT_FALSE(
          !dest.WriteByte(static_cast<uint8_t>(compression_type_)))) {
    return Fail(dest.status());
  }

  if (ABSL_PREDICT_FALSE(
          !sizes_compressor_.LengthPrefixedEncodeAndClose(dest))) {
    return Fail(sizes_compressor_.status());
  }

  if (ABSL_PREDICT_FALSE(!values_compressor_.EncodeAndClose(dest))) {
    return Fail(values_compressor_.status());
  }
  return Close();
}

}  // namespace riegeli


================================================
FILE: riegeli/chunk_encoding/simple_encoder.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CHUNK_ENCODING_SIMPLE_ENCODER_H_
#define RIEGELI_CHUNK_ENCODING_SIMPLE_ENCODER_H_

#include <stddef.h>
#include <stdint.h>

#include <optional>
#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/strings/cord.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/chunk_encoder.h"
#include "riegeli/chunk_encoding/compressor.h"
#include "riegeli/chunk_encoding/compressor_options.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/messages/serialize_message.h"

namespace riegeli {

// Format:
//  - Compression type
//  - Size of record sizes (compressed if applicable)
//  - Record sizes (possibly compressed):
//    - Array of `num_records` varints: sizes of records
//  - Record values (possibly compressed):
//    - Concatenated record data (bytes)
//
// If compression is used, a compressed block is prefixed by its varint-encoded
// uncompressed size.
class SimpleEncoder : public ChunkEncoder {
 public:
  class TuningOptions {
   public:
    TuningOptions() noexcept {}

    // Expected uncompressed size of concatenated values, or `std::nullopt` if
    // unknown. This may improve compression density and performance.
    //
    // If the size hint turns out to not match reality, nothing breaks.
    //
    // Default: `std::nullopt`.
    TuningOptions& set_size_hint(std::optional<Position> size_hint) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      size_hint_ = size_hint;
      return *this;
    }
    TuningOptions&& set_size_hint(std::optional<Position> size_hint) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_size_hint(size_hint));
    }
    std::optional<Position> size_hint() const { return size_hint_; }

    // Options for a global `RecyclingPool` of compression contexts.
    //
    // They tune the amount of memory which is kept to speed up creation of new
    // compression sessions, and usage of a background thread to clean it.
    //
    // Default: `RecyclingPoolOptions()`.
    TuningOptions& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      recycling_pool_options_ = recycling_pool_options;
      return *this;
    }
    TuningOptions&& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_recycling_pool_options(recycling_pool_options));
    }
    const RecyclingPoolOptions& recycling_pool_options() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recycling_pool_options_;
    }

   private:
    std::optional<Position> size_hint_;
    RecyclingPoolOptions recycling_pool_options_;
  };

  // Creates an empty `SimpleEncoder`.
  explicit SimpleEncoder(CompressorOptions compressor_options,
                         TuningOptions tuning_options = TuningOptions());

  void Clear() override;

  using ChunkEncoder::AddRecord;
  bool AddRecord(const google::protobuf::MessageLite& record,
                 SerializeMessageOptions serialize_options) override;
  bool AddRecord(BytesRef record) override;
  bool AddRecord(ExternalRef record) override;
  bool AddRecord(const Chain& record) override;
  bool AddRecord(Chain&& record) override;
  bool AddRecord(const absl::Cord& record) override;
  bool AddRecord(absl::Cord&& record) override;

  bool AddRecords(Chain records, std::vector<size_t> limits) override;

  bool EncodeAndClose(Writer& dest, ChunkType& chunk_type,
                      uint64_t& num_records,
                      uint64_t& decoded_data_size) override;

 private:
  // This template is defined and used only in simple_encoder.cc.
  template <typename Record>
  bool AddRecordImpl(Record&& record);

  CompressionType compression_type_;
  chunk_encoding_internal::Compressor sizes_compressor_;
  chunk_encoding_internal::Compressor values_compressor_;
};

}  // namespace riegeli

#endif  // RIEGELI_CHUNK_ENCODING_SIMPLE_ENCODER_H_


================================================
FILE: riegeli/chunk_encoding/transpose_decoder.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/chunk_encoding/transpose_decoder.h"

#include <stddef.h>
#include <stdint.h>

#include <algorithm>
#include <cstring>
#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/container/fixed_array.h"
#include "absl/container/flat_hash_map.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/global.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/chain_reader.h"
#include "riegeli/bytes/limiting_backward_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/string_reader.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/chunk_encoding/decompressor.h"
#include "riegeli/chunk_encoding/field_projection.h"
#include "riegeli/chunk_encoding/transpose_internal.h"
#include "riegeli/messages/message_wire_format.h"
#include "riegeli/varint/varint_reading.h"
#include "riegeli/varint/varint_writing.h"

namespace riegeli {
namespace {

Reader* kEmptyReader() {
  Reader* const reader = &Global([] { return riegeli::StringReader<>(); });
  RIEGELI_ASSERT_OK(*reader) << "kEmptyReader() has been closed";
  return reader;
}

constexpr uint32_t kInvalidPos = std::numeric_limits<uint32_t>::max();

// Information about one data bucket used in projection.
struct DataBucket {
  // Raw bucket data, valid if not all buffers are already decompressed,
  // otherwise empty.
  Chain compressed_data;
  // Sizes of data buffers in the bucket, valid if not all buffers are already
  // decompressed, otherwise empty.
  std::vector<size_t> buffer_sizes;
  // Decompressor for the remaining data, valid if some but not all buffers are
  // already decompressed, otherwise closed.
  chunk_encoding_internal::Decompressor<ChainReader<>> decompressor{kClosed};
  // A prefix of decompressed data buffers, lazily extended.
  std::vector<ChainReader<Chain>> buffers;
};

// Should the data content of the field be decoded?
enum class FieldIncluded {
  kYes,
  kNo,
  kExistenceOnly,
};

// Returns `true` if `tag` is a valid protocol buffer tag.
bool ValidTag(uint32_t tag) {
  switch (GetTagWireType(tag)) {
    case WireType::kVarint:
    case WireType::kFixed32:
    case WireType::kFixed64:
    case WireType::kLengthDelimited:
    case WireType::kStartGroup:
    case WireType::kEndGroup:
      return tag >= 8;
    case WireType::kInvalid6:
    case WireType::kInvalid7:
      return false;
  }
  RIEGELI_ASSUME_UNREACHABLE()
      << "Impossible wire type: " << static_cast<int>(GetTagWireType(tag));
}

// The types of callbacks in state machine states.
enum class CallbackType : uint8_t {
  kNoOp,
  kMessageStart,
  kSubmessageStart,
  kSubmessageEnd,
  kSelectCallback,
  kSkippedSubmessageStart,
  kSkippedSubmessageEnd,
  kNonProto,
  kFailure,

// `kCopyTag_*` has to be the first `CallbackType` in `TYPES_FOR_TAG_LEN` for
// `GetCopyTagCallbackType()` to work.
#define TYPES_FOR_TAG_LEN(tag_length)                                         \
  kCopyTag_##tag_length, kVarint_1_##tag_length, kVarint_2_##tag_length,      \
      kVarint_3_##tag_length, kVarint_4_##tag_length, kVarint_5_##tag_length, \
      kVarint_6_##tag_length, kVarint_7_##tag_length, kVarint_8_##tag_length, \
      kVarint_9_##tag_length, kVarint_10_##tag_length, kFixed32_##tag_length, \
      kFixed64_##tag_length, kFixed32Existence_##tag_length,                  \
      kFixed64Existence_##tag_length, kString_##tag_length,                   \
      kStartProjectionGroup_##tag_length, kEndProjectionGroup_##tag_length

  TYPES_FOR_TAG_LEN(1),
  TYPES_FOR_TAG_LEN(2),
  TYPES_FOR_TAG_LEN(3),
  TYPES_FOR_TAG_LEN(4),
  TYPES_FOR_TAG_LEN(5),
#undef TYPES_FOR_TAG_LEN

  // We need `kCopyTag_*` callback for length 6 as well because of inline
  // numerics. `kCopyTag_6` has to be the first `CallbackType` after
  // `TYPES_FOR_TAG_LEN` for `GetCopyTagCallbackType()` to work.
  kCopyTag_6,
  kUnknown,
};

static_assert(static_cast<uint8_t>(CallbackType::kUnknown) < (1 << 7),
              "CallbackType has too many cases to fit in 7 bits.");

constexpr CallbackType operator+(CallbackType a, uint8_t b) {
  return static_cast<CallbackType>(static_cast<uint8_t>(a) + b);
}

constexpr uint8_t operator-(CallbackType a, CallbackType b) {
  return static_cast<uint8_t>(a) - static_cast<uint8_t>(b);
}

// Node template that can be used to resolve the `CallbackType` of the node in
// decoding phase.
struct StateMachineNodeTemplate {
  // `bucket_index` and `buffer_within_bucket_index` identify the decoder
  // to read data from.
  uint32_t bucket_index;
  uint32_t buffer_within_bucket_index;
  // Proto tag of the node.
  uint32_t tag;
  // Tag subtype.
  chunk_encoding_internal::Subtype subtype;
  // Length of the varint encoded tag.
  uint8_t tag_length;
};

// Returns copy tag callback type for `tag_length`.
inline CallbackType GetCopyTagCallbackType(size_t tag_length) {
  RIEGELI_ASSERT_GT(tag_length, 0u) << "Zero tag length";
  RIEGELI_ASSERT_LE(tag_length, kMaxLengthVarint32 + 1)
      << "Tag length too large";
  return CallbackType::kCopyTag_1 +
         (tag_length - 1) *
             (CallbackType::kCopyTag_2 - CallbackType::kCopyTag_1);
}

// Returns varint callback type for `subtype` and `tag_length`.
inline CallbackType GetVarintCallbackType(
    chunk_encoding_internal::Subtype subtype, size_t tag_length) {
  RIEGELI_ASSERT_GT(tag_length, 0u) << "Zero tag length";
  RIEGELI_ASSERT_LE(tag_length, kMaxLengthVarint32) << "Tag length too large";
  if (subtype > chunk_encoding_internal::Subtype::kVarintInlineMax)
    return CallbackType::kUnknown;
  if (subtype >= chunk_encoding_internal::Subtype::kVarintInline0) {
    return GetCopyTagCallbackType(tag_length + 1);
  }
  return CallbackType::kVarint_1_1 +
         (subtype - chunk_encoding_internal::Subtype::kVarint1) *
             (CallbackType::kVarint_2_1 - CallbackType::kVarint_1_1) +
         (tag_length - 1) *
             (CallbackType::kVarint_1_2 - CallbackType::kVarint_1_1);
}

// Returns fixed32 callback type for `tag_length`.
inline CallbackType GetFixed32CallbackType(size_t tag_length) {
  RIEGELI_ASSERT_GT(tag_length, 0u) << "Zero tag length";
  RIEGELI_ASSERT_LE(tag_length, kMaxLengthVarint32) << "Tag length too large";
  return CallbackType::kFixed32_1 +
         (tag_length - 1) *
             (CallbackType::kFixed32_2 - CallbackType::kFixed32_1);
}

// Returns fixed64 callback type for `tag_length`.
inline CallbackType GetFixed64CallbackType(size_t tag_length) {
  RIEGELI_ASSERT_GT(tag_length, 0u) << "Zero tag length";
  RIEGELI_ASSERT_LE(tag_length, kMaxLengthVarint32) << "Tag length too large";
  return CallbackType::kFixed64_1 +
         (tag_length - 1) *
             (CallbackType::kFixed64_2 - CallbackType::kFixed64_1);
}

// Returns fixed32 existence callback type for `tag_length`.
inline CallbackType GetFixed32ExistenceCallbackType(size_t tag_length) {
  RIEGELI_ASSERT_GT(tag_length, 0u) << "Zero tag length";
  RIEGELI_ASSERT_LE(tag_length, kMaxLengthVarint32) << "Tag length too large";
  return CallbackType::kFixed32Existence_1 +
         (tag_length - 1) * (CallbackType::kFixed32Existence_2 -
                             CallbackType::kFixed32Existence_1);
}

// Returns fixed64 existence callback type for `tag_length`.
inline CallbackType GetFixed64ExistenceCallbackType(size_t tag_length) {
  RIEGELI_ASSERT_GT(tag_length, 0u) << "Zero tag length";
  RIEGELI_ASSERT_LE(tag_length, kMaxLengthVarint32) << "Tag length too large";
  return CallbackType::kFixed64Existence_1 +
         (tag_length - 1) * (CallbackType::kFixed64Existence_2 -
                             CallbackType::kFixed64Existence_1);
}

// Returns string callback type for `subtype` and `tag_length`.
inline CallbackType GetStringCallbackType(
    chunk_encoding_internal::Subtype subtype, size_t tag_length) {
  RIEGELI_ASSERT_GT(tag_length, 0u) << "Zero tag length";
  RIEGELI_ASSERT_LE(tag_length, kMaxLengthVarint32) << "Tag length too large";
  switch (subtype) {
    case chunk_encoding_internal::Subtype::kLengthDelimitedString:
      return CallbackType::kString_1 +
             (tag_length - 1) *
                 (CallbackType::kString_2 - CallbackType::kString_1);
    case chunk_encoding_internal::Subtype::kLengthDelimitedEndOfSubmessage:
      return CallbackType::kSubmessageEnd;
    default:
      // Note: Nodes with `kLengthDelimitedStartOfSubmessage` are not created.
      // Start of submessage is indicated with `MessageId::kStartOfSubmessage`
      // and uses `CallbackType::kSubmessageStart`.
      return CallbackType::kUnknown;
  }
}

// Returns string callback type for `subtype` and `tag_length` to exclude field.
inline CallbackType GetStringExcludeCallbackType(
    chunk_encoding_internal::Subtype subtype, size_t tag_length) {
  RIEGELI_ASSERT_GT(tag_length, 0u) << "Zero tag length";
  RIEGELI_ASSERT_LE(tag_length, kMaxLengthVarint32) << "Tag length too large";
  switch (subtype) {
    case chunk_encoding_internal::Subtype::kLengthDelimitedString:
      return CallbackType::kNoOp;
    case chunk_encoding_internal::Subtype::kLengthDelimitedEndOfSubmessage:
      return CallbackType::kSkippedSubmessageEnd;
    default:
      return CallbackType::kUnknown;
  }
}

// Returns string existence callback type for `subtype` and `tag_length`.
inline CallbackType GetStringExistenceCallbackType(
    chunk_encoding_internal::Subtype subtype, size_t tag_length) {
  RIEGELI_ASSERT_GT(tag_length, 0u) << "Zero tag length";
  RIEGELI_ASSERT_LE(tag_length, kMaxLengthVarint32) << "Tag length too large";
  switch (subtype) {
    case chunk_encoding_internal::Subtype::kLengthDelimitedString:
      // We use the fact that there is a zero stored in `tag_data`. This decodes
      // as an empty string in proto decoder.
      return GetCopyTagCallbackType(tag_length + 1);
    case chunk_encoding_internal::Subtype::kLengthDelimitedEndOfSubmessage:
      return CallbackType::kSubmessageEnd;
    default:
      return CallbackType::kUnknown;
  }
}

inline CallbackType GetStartProjectionGroupCallbackType(size_t tag_length) {
  RIEGELI_ASSERT_GT(tag_length, 0u) << "Zero tag length";
  RIEGELI_ASSERT_LE(tag_length, kMaxLengthVarint32) << "Tag length too large";
  return CallbackType::kStartProjectionGroup_1 +
         (tag_length - 1) * (CallbackType::kStartProjectionGroup_2 -
                             CallbackType::kStartProjectionGroup_1);
}

inline CallbackType GetEndProjectionGroupCallbackType(size_t tag_length) {
  RIEGELI_ASSERT_GT(tag_length, 0u) << "Zero tag length";
  RIEGELI_ASSERT_LE(tag_length, kMaxLengthVarint32) << "Tag length too large";
  return CallbackType::kEndProjectionGroup_1 +
         (tag_length - 1) * (CallbackType::kEndProjectionGroup_2 -
                             CallbackType::kEndProjectionGroup_1);
}

// Get callback for node.
inline CallbackType GetCallbackType(FieldIncluded field_included, uint32_t tag,
                                    chunk_encoding_internal::Subtype subtype,
                                    size_t tag_length,
                                    bool projection_enabled) {
  RIEGELI_ASSERT_GT(tag_length, 0u) << "Zero tag length";
  RIEGELI_ASSERT_LE(tag_length, kMaxLengthVarint32) << "Tag length too large";
  switch (field_included) {
    case FieldIncluded::kYes:
      switch (GetTagWireType(tag)) {
        case WireType::kVarint:
          return GetVarintCallbackType(subtype, tag_length);
        case WireType::kFixed32:
          return GetFixed32CallbackType(tag_length);
        case WireType::kFixed64:
          return GetFixed64CallbackType(tag_length);
        case WireType::kLengthDelimited:
          return GetStringCallbackType(subtype, tag_length);
        case WireType::kStartGroup:
          return projection_enabled
                     ? GetStartProjectionGroupCallbackType(tag_length)
                     : GetCopyTagCallbackType(tag_length);
        case WireType::kEndGroup:
          return projection_enabled
                     ? GetEndProjectionGroupCallbackType(tag_length)
                     : GetCopyTagCallbackType(tag_length);
        default:
          return CallbackType::kUnknown;
      }
    case FieldIncluded::kNo:
      switch (GetTagWireType(tag)) {
        case WireType::kVarint:
        case WireType::kFixed32:
        case WireType::kFixed64:
          return CallbackType::kNoOp;
        case WireType::kLengthDelimited:
          return GetStringExcludeCallbackType(subtype, tag_length);
        case WireType::kStartGroup:
          return CallbackType::kSkippedSubmessageStart;
        case WireType::kEndGroup:
          return CallbackType::kSkippedSubmessageEnd;
        default:
          return CallbackType::kUnknown;
      }
    case FieldIncluded::kExistenceOnly:
      switch (GetTagWireType(tag)) {
        case WireType::kVarint:
          return GetCopyTagCallbackType(tag_length + 1);
        case WireType::kFixed32:
          return GetFixed32ExistenceCallbackType(tag_length);
        case WireType::kFixed64:
          return GetFixed64ExistenceCallbackType(tag_length);
        case WireType::kLengthDelimited:
          return GetStringExistenceCallbackType(subtype, tag_length);
        case WireType::kStartGroup:
          return GetStartProjectionGroupCallbackType(tag_length);
        case WireType::kEndGroup:
          return GetEndProjectionGroupCallbackType(tag_length);
        default:
          return CallbackType::kUnknown;
      }
  }
  RIEGELI_ASSUME_UNREACHABLE()
      << "Unknown FieldIncluded: " << static_cast<int>(field_included);
}

}  // namespace

// Node of the state machine read from input.
struct TransposeDecoder::StateMachineNode {
  // Tag for the field decoded by this node, may benefit from being aligned.
  //
  // `tag_data` contains varint encoded tag (1 to 5 bytes) followed by inline
  // numeric (if any) or zero otherwise.
  char tag_data[kMaxLengthVarint32 + 1];
  // Size of the tag data. Must be in the range [1..5].
  uint8_t tag_data_size : 7;
  // Whether the callback is implicit.
  bool is_implicit : 1;
  CallbackType callback_type;
  union {
    // Buffer to read data from.
    Reader* buffer;
    // In projection mode, the node is updated in decoding phase based on the
    // current submessage stack and this template.
    StateMachineNodeTemplate* node_template;
  };
  // Node to move to after finishing the callback for this node.
  StateMachineNode* next_node;
};

struct TransposeDecoder::Context {
  static_assert(sizeof(StateMachineNode) == 8 + 2 * sizeof(void*),
                "Unexpected padding in StateMachineNode.");
  // Compression type of the input.
  CompressionType compression_type = CompressionType::kNone;
  // Buffer containing all the data.
  // Note: Used only when projection is disabled.
  std::vector<ChainReader<Chain>> buffers;
  // Buffer for lengths of nonproto messages.
  Reader* nonproto_lengths = nullptr;
  // State machine read from the input.
  std::vector<StateMachineNode> state_machine_nodes;
  // Node to start decoding from.
  uint32_t first_node = 0;
  // State machine transitions. One byte = one transition.
  chunk_encoding_internal::Decompressor<> transitions{kClosed};

  enum class IncludeType : uint8_t {
    // Field is included.
    kIncludeFully,
    // Some child fields are included.
    kIncludeChild,
    // Field is existence only.
    kExistenceOnly,
  };

  // --- Fields used in projection. ---
  // Holds information about included field.
  struct IncludedField {
    // IDs are sequentially assigned to fields from FieldProjection.
    uint32_t field_id;
    IncludeType include_type;
  };
  // Fields form a tree structure stored in `include_fields` map. If `p` is
  // the ID of parent submessage then `include_fields[std::make_pair(p, f)]`
  // holds the include information of the child with field number `f`. The root
  // ID is assumed to be `kInvalidPos` and the root `IncludeType` is assumed to
  // be `kIncludeChild`.
  absl::flat_hash_map<std::pair<uint32_t, int>, IncludedField> include_fields;
  // Data buckets.
  std::vector<DataBucket> buckets;
  // Template that can later be used later to finalize `StateMachineNode`.
  std::vector<StateMachineNodeTemplate> node_templates;
};

bool TransposeDecoder::Decode(uint64_t num_records, uint64_t decoded_data_size,
                              const FieldProjection& field_projection,
                              Reader& src, BackwardWriter& dest,
                              std::vector<size_t>& limits) {
  RIEGELI_ASSERT_EQ(dest.pos(), 0u)
      << "Failed precondition of TransposeDecoder::Reset(): "
         "non-zero destination position";
  Object::Reset();
  if (ABSL_PREDICT_FALSE(num_records > limits.max_size())) {
    return Fail(absl::ResourceExhaustedError("Too many records"));
  }
  if (ABSL_PREDICT_FALSE(decoded_data_size >
                         std::numeric_limits<size_t>::max())) {
    return Fail(absl::ResourceExhaustedError("Records too large"));
  }

  Context context;
  if (ABSL_PREDICT_FALSE(!Parse(context, src, field_projection))) return false;
  LimitingBackwardWriter<> limiting_dest(
      &dest, LimitingBackwardWriterBase::Options()
                 .set_max_length(decoded_data_size)
                 .set_exact(field_projection.includes_all()));
  if (ABSL_PREDICT_FALSE(
          !Decode(context, num_records, limiting_dest, limits))) {
    limiting_dest.Close();
    return false;
  }
  if (ABSL_PREDICT_FALSE(!limiting_dest.Close())) {
    return Fail(limiting_dest.status());
  }
  return true;
}

inline bool TransposeDecoder::Parse(Context& context, Reader& src,
                                    const FieldProjection& field_projection) {
  bool projection_enabled = true;
  for (const Field& include_field : field_projection.fields()) {
    if (include_field.path().empty()) {
      projection_enabled = false;
      break;
    }
    size_t path_len = include_field.path().size();
    bool existence_only =
        include_field.path()[path_len - 1] == Field::kExistenceOnly;
    if (existence_only) {
      --path_len;
      if (path_len == 0) continue;
    }
    uint32_t current_id = kInvalidPos;
    for (size_t i = 0; i < path_len; ++i) {
      const int field_number = include_field.path()[i];
      if (field_number == Field::kExistenceOnly) return false;
      uint32_t next_id = context.include_fields.size();
      Context::IncludeType include_type = Context::IncludeType::kIncludeChild;
      if (i + 1 == path_len) {
        include_type = existence_only ? Context::IncludeType::kExistenceOnly
                                      : Context::IncludeType::kIncludeFully;
      }
      Context::IncludedField& val =
          context.include_fields
              .emplace(std::make_pair(current_id, field_number),
                       Context::IncludedField{next_id, include_type})
              .first->second;
      current_id = val.field_id;
      static_assert(Context::IncludeType::kExistenceOnly >
                            Context::IncludeType::kIncludeChild &&
                        Context::IncludeType::kIncludeChild >
                            Context::IncludeType::kIncludeFully,
                    "Statement below assumes this ordering");
      val.include_type = std::min(val.include_type, include_type);
    }
  }

  uint8_t compression_type_byte;
  if (ABSL_PREDICT_FALSE(!src.ReadByte(compression_type_byte))) {
    return Fail(src.StatusOrAnnotate(
        absl::InvalidArgumentError("Reading compression type failed")));
  }
  context.compression_type =
      static_cast<CompressionType>(compression_type_byte);

  uint64_t header_size;
  Chain header;
  if (ABSL_PREDICT_FALSE(!ReadVarint64(src, header_size) ||
                         !src.Read(header_size, header))) {
    return Fail(src.StatusOrAnnotate(
        absl::InvalidArgumentError("Reading header failed")));
  }
  chunk_encoding_internal::Decompressor<ChainReader<>> header_decompressor(
      riegeli::Maker(&header), context.compression_type,
      chunk_encoding_internal::DecompressorOptions().set_recycling_pool_options(
          recycling_pool_options_));
  if (ABSL_PREDICT_FALSE(!header_decompressor.ok())) {
    return Fail(header_decompressor.status());
  }

  uint32_t num_buffers;
  std::optional<absl::FixedArray<uint32_t>> first_buffer_indices;
  std::optional<absl::FixedArray<uint32_t>> bucket_indices;
  if (projection_enabled) {
    if (ABSL_PREDICT_FALSE(!ParseBuffersForFiltering(
            context, header_decompressor.reader(), src, first_buffer_indices,
            bucket_indices))) {
      return false;
    }
    num_buffers = IntCast<uint32_t>(bucket_indices->size());
  } else {
    if (ABSL_PREDICT_FALSE(
            !ParseBuffers(context, header_decompressor.reader(), src))) {
      return false;
    }
    num_buffers = IntCast<uint32_t>(context.buffers.size());
  }

  uint32_t state_machine_size;
  if (ABSL_PREDICT_FALSE(
          !ReadVarint32(header_decompressor.reader(), state_machine_size))) {
    return Fail(header_decompressor.reader().StatusOrAnnotate(
        absl::InvalidArgumentError("Reading state machine size failed")));
  }
  // Additional `0xff` nodes to correctly handle invalid/malicious inputs.
  context.state_machine_nodes.resize(size_t{state_machine_size} + 0xff);
  if (projection_enabled) {
    context.node_templates.resize(size_t{state_machine_size});
  }
  std::vector<StateMachineNode>& state_machine_nodes =
      context.state_machine_nodes;
  bool has_nonproto_op = false;
  size_t num_subtypes = 0;
  absl::FixedArray<uint32_t> tags(size_t{state_machine_size});
  for (size_t i = 0; i < state_machine_size; ++i) {
    uint32_t tag;
    if (ABSL_PREDICT_FALSE(!ReadVarint32(header_decompressor.reader(), tag))) {
      return Fail(header_decompressor.reader().StatusOrAnnotate(
          absl::InvalidArgumentError("Reading field tag failed")));
    }
    tags[i] = tag;
    if (ValidTag(tag) && chunk_encoding_internal::HasSubtype(tag)) {
      ++num_subtypes;
    }
  }
  absl::FixedArray<uint32_t> next_node_indices(size_t{state_machine_size});
  for (size_t i = 0; i < state_machine_size; ++i) {
    uint32_t next_node;
    if (ABSL_PREDICT_FALSE(
            !ReadVarint32(header_decompressor.reader(), next_node))) {
      return Fail(header_decompressor.reader().StatusOrAnnotate(
          absl::InvalidArgumentError("Reading next node index failed")));
    }
    next_node_indices[i] = next_node;
  }
  std::string subtypes;
  if (ABSL_PREDICT_FALSE(
          !header_decompressor.reader().Read(num_subtypes, subtypes))) {
    return Fail(header_decompressor.reader().StatusOrAnnotate(
        absl::InvalidArgumentError("Reading subtypes failed")));
  }
  size_t subtype_index = 0;
  for (size_t i = 0; i < state_machine_size; ++i) {
    uint32_t tag = tags[i];
    StateMachineNode& state_machine_node = state_machine_nodes[i];
    state_machine_node.buffer = nullptr;
    switch (static_cast<chunk_encoding_internal::MessageId>(tag)) {
      case chunk_encoding_internal::MessageId::kNoOp:
        state_machine_node.callback_type = CallbackType::kNoOp;
        break;
      case chunk_encoding_internal::MessageId::kNonProto: {
        state_machine_node.callback_type = CallbackType::kNonProto;
        uint32_t buffer_index;
        if (ABSL_PREDICT_FALSE(
                !ReadVarint32(header_decompressor.reader(), buffer_index))) {
          return Fail(header_decompressor.reader().StatusOrAnnotate(
              absl::InvalidArgumentError("Reading buffer index failed")));
        }
        if (ABSL_PREDICT_FALSE(buffer_index >= num_buffers)) {
          return Fail(absl::InvalidArgumentError("Buffer index too large"));
        }
        if (projection_enabled) {
          const uint32_t bucket = (*bucket_indices)[buffer_index];
          state_machine_node.buffer = GetBuffer(
              context, bucket, buffer_index - (*first_buffer_indices)[bucket]);
          if (ABSL_PREDICT_FALSE(state_machine_node.buffer == nullptr)) {
            return false;
          }
        } else {
          state_machine_node.buffer = &context.buffers[buffer_index];
        }
        has_nonproto_op = true;
      } break;
      case chunk_encoding_internal::MessageId::kStartOfMessage:
        state_machine_node.callback_type = CallbackType::kMessageStart;
        break;
      case chunk_encoding_internal::MessageId::kStartOfSubmessage:
        if (projection_enabled) {
          context.node_templates[i].tag = static_cast<uint32_t>(
              chunk_encoding_internal::MessageId::kStartOfSubmessage);
          state_machine_node.node_template = &context.node_templates[i];
          state_machine_node.callback_type = CallbackType::kSelectCallback;
        } else {
          state_machine_node.callback_type = CallbackType::kSubmessageStart;
        }
        break;
      default: {
        chunk_encoding_internal::Subtype subtype =
            chunk_encoding_internal::Subtype::kTrivial;
        static_assert(
            chunk_encoding_internal::Subtype::kLengthDelimitedString ==
                chunk_encoding_internal::Subtype::kTrivial,
            "chunk_encoding_internal::Subtypes kLengthDelimitedString and "
            "kTrivial must be equal");
        // End of submessage is encoded as `kSubmessageWireType`.
        if (GetTagWireType(tag) ==
            chunk_encoding_internal::kSubmessageWireType) {
          tag -= static_cast<uint32_t>(
                     chunk_encoding_internal::kSubmessageWireType) -
                 static_cast<uint32_t>(WireType::kLengthDelimited);
          subtype =
              chunk_encoding_internal::Subtype::kLengthDelimitedEndOfSubmessage;
        }
        if (ABSL_PREDICT_FALSE(!ValidTag(tag))) {
          return Fail(absl::InvalidArgumentError("Invalid tag"));
        }
        char* const tag_end = WriteVarint32(tag, state_machine_node.tag_data);
        const size_t tag_length =
            PtrDistance(state_machine_node.tag_data, tag_end);
        if (chunk_encoding_internal::HasSubtype(tag)) {
          subtype = static_cast<chunk_encoding_internal::Subtype>(
              subtypes[subtype_index++]);
        }
        if (projection_enabled) {
          if (chunk_encoding_internal::HasDataBuffer(tag, subtype)) {
            uint32_t buffer_index;
            if (ABSL_PREDICT_FALSE(!ReadVarint32(header_decompressor.reader(),
                                                 buffer_index))) {
              return Fail(header_decompressor.reader().StatusOrAnnotate(
                  absl::InvalidArgumentError("Reading buffer index failed")));
            }
            if (ABSL_PREDICT_FALSE(buffer_index >= num_buffers)) {
              return Fail(absl::InvalidArgumentError("Buffer index too large"));
            }
            const uint32_t bucket = (*bucket_indices)[buffer_index];
            context.node_templates[i].bucket_index = bucket;
            context.node_templates[i].buffer_within_bucket_index =
                buffer_index - (*first_buffer_indices)[bucket];
          } else {
            context.node_templates[i].bucket_index = kInvalidPos;
          }
          context.node_templates[i].tag = tag;
          context.node_templates[i].subtype = subtype;
          context.node_templates[i].tag_length = IntCast<uint8_t>(tag_length);
          state_machine_node.node_template = &context.node_templates[i];
          state_machine_node.callback_type = CallbackType::kSelectCallback;
        } else {
          if (chunk_encoding_internal::HasDataBuffer(tag, subtype)) {
            uint32_t buffer_index;
            if (ABSL_PREDICT_FALSE(!ReadVarint32(header_decompressor.reader(),
                                                 buffer_index))) {
              return Fail(header_decompressor.reader().StatusOrAnnotate(
                  absl::InvalidArgumentError("Reading buffer index failed")));
            }
            if (ABSL_PREDICT_FALSE(buffer_index >= num_buffers)) {
              return Fail(absl::InvalidArgumentError("Buffer index too large"));
            }
            state_machine_node.buffer = &context.buffers[buffer_index];
          }
          state_machine_node.callback_type =
              GetCallbackType(FieldIncluded::kYes, tag, subtype, tag_length,
                              projection_enabled);
          if (ABSL_PREDICT_FALSE(state_machine_node.callback_type ==
                                 CallbackType::kUnknown)) {
            return Fail(absl::InvalidArgumentError("Invalid node"));
          }
        }
        // Store subtype right past tag in case this is inline numeric.
        if (GetTagWireType(tag) == WireType::kVarint &&
            subtype >= chunk_encoding_internal::Subtype::kVarintInline0) {
          state_machine_node.tag_data[tag_length] =
              subtype - chunk_encoding_internal::Subtype::kVarintInline0;
        } else {
          state_machine_node.tag_data[tag_length] = 0;
        }
        state_machine_node.tag_data_size = IntCast<uint8_t>(tag_length);
      }
    }
    uint32_t next_node_id = next_node_indices[i];
    if (next_node_id >= state_machine_size) {
      // Callback is implicit.
      next_node_id -= state_machine_size;
      state_machine_node.is_implicit = true;
    } else {
      state_machine_node.is_implicit = false;
    }
    if (ABSL_PREDICT_FALSE(next_node_id >= state_machine_size)) {
      return Fail(absl::InvalidArgumentError("Node index too large"));
    }

    state_machine_node.next_node = &state_machine_nodes[next_node_id];
  }

  if (has_nonproto_op) {
    // If non-proto state exists then the last buffer is the
    // `nonproto_lengths` buffer.
    if (ABSL_PREDICT_FALSE(num_buffers == 0)) {
      return Fail(
          absl::InvalidArgumentError("Missing buffer for non-proto records"));
    }
    if (projection_enabled) {
      const uint32_t bucket = (*bucket_indices)[num_buffers - 1];
      context.nonproto_lengths = GetBuffer(
          context, bucket, num_buffers - 1 - (*first_buffer_indices)[bucket]);
      if (ABSL_PREDICT_FALSE(context.nonproto_lengths == nullptr)) return false;
    } else {
      context.nonproto_lengths = &context.buffers.back();
    }
  }

  if (ABSL_PREDICT_FALSE(
          !ReadVarint32(header_decompressor.reader(), context.first_node))) {
    return Fail(header_decompressor.reader().StatusOrAnnotate(
        absl::InvalidArgumentError("Reading first node index failed")));
  }
  if (ABSL_PREDICT_FALSE(context.first_node >= state_machine_size)) {
    return Fail(absl::InvalidArgumentError("First node index too large"));
  }

  // Add `0xff` failure nodes so we never overflow this array.
  for (size_t i = size_t{state_machine_size};
       i < size_t{state_machine_size} + 0xff; ++i) {
    state_machine_nodes[i].callback_type = CallbackType::kFailure;
  }

  if (ABSL_PREDICT_FALSE(ContainsImplicitLoop(&state_machine_nodes))) {
    return Fail(absl::InvalidArgumentError("Nodes contain an implicit loop"));
  }

  if (ABSL_PREDICT_FALSE(!header_decompressor.VerifyEndAndClose())) {
    return Fail(header_decompressor.status());
  }
  context.transitions.Reset(
      &src, context.compression_type,
      chunk_encoding_internal::DecompressorOptions().set_recycling_pool_options(
          recycling_pool_options_));
  if (ABSL_PREDICT_FALSE(!context.transitions.ok())) {
    return Fail(context.transitions.status());
  }
  return true;
}

inline bool TransposeDecoder::ParseBuffers(Context& context,
                                           Reader& header_reader, Reader& src) {
  uint32_t num_buckets;
  if (ABSL_PREDICT_FALSE(!ReadVarint32(header_reader, num_buckets))) {
    return Fail(header_reader.StatusOrAnnotate(
        absl::InvalidArgumentError("Reading number of buckets failed")));
  }
  uint32_t num_buffers;
  if (ABSL_PREDICT_FALSE(!ReadVarint32(header_reader, num_buffers))) {
    return Fail(header_reader.StatusOrAnnotate(
        absl::InvalidArgumentError("Reading number of buffers failed")));
  }
  if (ABSL_PREDICT_FALSE(num_buffers > context.buffers.max_size())) {
    return Fail(absl::InvalidArgumentError("Too many buffers"));
  }
  if (num_buckets == 0) {
    if (ABSL_PREDICT_FALSE(num_buffers != 0)) {
      return Fail(absl::InvalidArgumentError("Too few buckets"));
    }
    return true;
  }
  context.buffers.reserve(num_buffers);
  std::vector<chunk_encoding_internal::Decompressor<ChainReader<Chain>>>
      bucket_decompressors;
  // Explicitly convert `num_buckets` to `size_t` to avoid a warning
  // `[-Wtautological-constant-out-of-range-compare]` if `max_size()` is
  // constexpr.
  if (ABSL_PREDICT_FALSE(size_t{num_buckets} >
                         bucket_decompressors.max_size())) {
    return Fail(absl::ResourceExhaustedError("Too many buckets"));
  }
  bucket_decompressors.reserve(num_buckets);
  for (uint32_t bucket_index = 0; bucket_index < num_buckets; ++bucket_index) {
    uint64_t bucket_length;
    if (ABSL_PREDICT_FALSE(!ReadVarint64(header_reader, bucket_length))) {
      return Fail(header_reader.StatusOrAnnotate(
          absl::InvalidArgumentError("Reading bucket length failed")));
    }
    if (ABSL_PREDICT_FALSE(bucket_length >
                           std::numeric_limits<size_t>::max())) {
      return Fail(absl::ResourceExhaustedError("Bucket too large"));
    }
    Chain bucket;
    if (ABSL_PREDICT_FALSE(!src.Read(IntCast<size_t>(bucket_length), bucket))) {
      return Fail(src.StatusOrAnnotate(
          absl::InvalidArgumentError("Reading bucket failed")));
    }
    bucket_decompressors.emplace_back(
        riegeli::Maker(std::move(bucket)), context.compression_type,
        chunk_encoding_internal::DecompressorOptions()
            .set_recycling_pool_options(recycling_pool_options_));
    if (ABSL_PREDICT_FALSE(!bucket_decompressors.back().ok())) {
      return Fail(bucket_decompressors.back().status());
    }
  }

  uint32_t bucket_index = 0;
  for (size_t buffer_index = 0; buffer_index < num_buffers; ++buffer_index) {
    uint64_t buffer_length;
    if (ABSL_PREDICT_FALSE(!ReadVarint64(header_reader, buffer_length))) {
      return Fail(header_reader.StatusOrAnnotate(
          absl::InvalidArgumentError("Reading buffer length failed")));
    }
    if (ABSL_PREDICT_FALSE(buffer_length >
                           std::numeric_limits<size_t>::max())) {
      return Fail(absl::ResourceExhaustedError("Buffer too large"));
    }
    Chain buffer;
    if (ABSL_PREDICT_FALSE(!bucket_decompressors[bucket_index].reader().Read(
            IntCast<size_t>(buffer_length), buffer))) {
      return Fail(bucket_decompressors[bucket_index].reader().StatusOrAnnotate(
          absl::InvalidArgumentError("Reading buffer failed")));
    }
    context.buffers.emplace_back(std::move(buffer));
    while (!bucket_decompressors[bucket_index].reader().Pull() &&
           bucket_index + 1 < num_buckets) {
      if (ABSL_PREDICT_FALSE(
              !bucket_decompressors[bucket_index].VerifyEndAndClose())) {
        return Fail(bucket_decompressors[bucket_index].status());
      }
      ++bucket_index;
    }
  }
  if (ABSL_PREDICT_FALSE(bucket_index + 1 < num_buckets)) {
    return Fail(absl::InvalidArgumentError("Too few buckets"));
  }
  if (ABSL_PREDICT_FALSE(
          !bucket_decompressors[bucket_index].VerifyEndAndClose())) {
    return Fail(bucket_decompressors[bucket_index].status());
  }
  return true;
}

inline bool TransposeDecoder::ParseBuffersForFiltering(
    Context& context, Reader& header_reader, Reader& src,
    std::optional<absl::FixedArray<uint32_t>>& first_buffer_indices,
    std::optional<absl::FixedArray<uint32_t>>& bucket_indices) {
  uint32_t num_buckets;
  if (ABSL_PREDICT_FALSE(!ReadVarint32(header_reader, num_buckets))) {
    return Fail(header_reader.StatusOrAnnotate(
        absl::InvalidArgumentError("Reading number of buckets failed")));
  }
  if (ABSL_PREDICT_FALSE(num_buckets > context.buckets.max_size())) {
    return Fail(absl::ResourceExhaustedError("Too many buckets"));
  }
  uint32_t num_buffers;
  if (ABSL_PREDICT_FALSE(!ReadVarint32(header_reader, num_buffers))) {
    return Fail(header_reader.StatusOrAnnotate(
        absl::InvalidArgumentError("Reading number of buffers failed")));
  }
  if (ABSL_PREDICT_FALSE(num_buffers >
                         absl::FixedArray<uint32_t>(0).max_size())) {
    return Fail(absl::ResourceExhaustedError("Too many buffers"));
  }
  first_buffer_indices.emplace(num_buckets);
  bucket_indices.emplace(num_buffers);
  if (num_buckets == 0) {
    if (ABSL_PREDICT_FALSE(num_buffers != 0)) {
      return Fail(absl::InvalidArgumentError("Too few buckets"));
    }
    return true;
  }
  context.buckets.reserve(num_buckets);
  for (uint32_t bucket_index = 0; bucket_index < num_buckets; ++bucket_index) {
    uint64_t bucket_length;
    if (ABSL_PREDICT_FALSE(!ReadVarint64(header_reader, bucket_length))) {
      return Fail(header_reader.StatusOrAnnotate(
          absl::InvalidArgumentError("Reading bucket length failed")));
    }
    if (ABSL_PREDICT_FALSE(bucket_length >
                           std::numeric_limits<size_t>::max())) {
      return Fail(absl::ResourceExhaustedError("Bucket too large"));
    }
    context.buckets.emplace_back();
    if (ABSL_PREDICT_FALSE(!src.Read(IntCast<size_t>(bucket_length),
                                     context.buckets.back().compressed_data))) {
      return Fail(src.StatusOrAnnotate(
          absl::InvalidArgumentError("Reading bucket failed")));
    }
  }

  uint32_t bucket_index = 0;
  (*first_buffer_indices)[0] = 0;
  std::optional<uint64_t> remaining_bucket_size =
      chunk_encoding_internal::UncompressedSize(
          context.buckets[0].compressed_data, context.compression_type);
  if (ABSL_PREDICT_FALSE(remaining_bucket_size == std::nullopt)) {
    return Fail(absl::InvalidArgumentError("Reading uncompressed size failed"));
  }
  for (uint32_t buffer_index = 0; buffer_index < num_buffers; ++buffer_index) {
    uint64_t buffer_length;
    if (ABSL_PREDICT_FALSE(!ReadVarint64(header_reader, buffer_length))) {
      return Fail(header_reader.StatusOrAnnotate(
          absl::InvalidArgumentError("Reading buffer length failed")));
    }
    if (ABSL_PREDICT_FALSE(buffer_length >
                           std::numeric_limits<size_t>::max())) {
      return Fail(absl::ResourceExhaustedError("Buffer too large"));
    }
    context.buckets[bucket_index].buffer_sizes.push_back(
        IntCast<size_t>(buffer_length));
    if (ABSL_PREDICT_FALSE(buffer_length > *remaining_bucket_size)) {
      return Fail(absl::InvalidArgumentError("Buffer does not fit in bucket"));
    }
    *remaining_bucket_size -= buffer_length;
    (*bucket_indices)[buffer_index] = bucket_index;
    while (*remaining_bucket_size == 0 && bucket_index + 1 < num_buckets) {
      ++bucket_index;
      (*first_buffer_indices)[bucket_index] = buffer_index + 1;
      remaining_bucket_size = chunk_encoding_internal::UncompressedSize(
          context.buckets[bucket_index].compressed_data,
          context.compression_type);
      if (ABSL_PREDICT_FALSE(remaining_bucket_size == std::nullopt)) {
        return Fail(
            absl::InvalidArgumentError("Reading uncompressed size failed"));
      }
    }
  }
  if (ABSL_PREDICT_FALSE(bucket_index + 1 < num_buckets)) {
    return Fail(absl::InvalidArgumentError("Too few buckets"));
  }
  if (ABSL_PREDICT_FALSE(*remaining_bucket_size > 0)) {
    return Fail(absl::InvalidArgumentError("End of data expected"));
  }
  return true;
}

inline Reader* TransposeDecoder::GetBuffer(Context& context,
                                           uint32_t bucket_index,
                                           uint32_t index_within_bucket) {
  RIEGELI_ASSERT_LT(bucket_index, context.buckets.size())
      << "Bucket index out of range";
  DataBucket& bucket = context.buckets[bucket_index];
  RIEGELI_ASSERT_LT(index_within_bucket, !bucket.buffer_sizes.empty()
                                             ? bucket.buffer_sizes.size()
                                             : bucket.buffers.size())
      << "Index within bucket out of range";
  while (index_within_bucket >= bucket.buffers.size()) {
    if (bucket.buffers.empty()) {
      // This is the first buffer to be decompressed from this bucket.
      bucket.decompressor.Reset(
          riegeli::Maker(&bucket.compressed_data), context.compression_type,
          chunk_encoding_internal::DecompressorOptions()
              .set_recycling_pool_options(recycling_pool_options_));
      if (ABSL_PREDICT_FALSE(!bucket.decompressor.ok())) {
        Fail(bucket.decompressor.status());
        return nullptr;
      }
      // Important to prevent invalidating pointers by `emplace_back()`.
      bucket.buffers.reserve(bucket.buffer_sizes.size());
    }
    Chain buffer;
    if (ABSL_PREDICT_FALSE(!bucket.decompressor.reader().Read(
            bucket.buffer_sizes[bucket.buffers.size()], buffer))) {
      Fail(bucket.decompressor.reader().StatusOrAnnotate(
          absl::InvalidArgumentError("Reading buffer failed")));
      return nullptr;
    }
    bucket.buffers.emplace_back(std::move(buffer));
    if (bucket.buffers.size() == bucket.buffer_sizes.size()) {
      // This was the last decompressed buffer from this bucket.
      if (ABSL_PREDICT_FALSE(!bucket.decompressor.VerifyEndAndClose())) {
        Fail(bucket.decompressor.status());
        return nullptr;
      }
      // Free memory of fields which are no longer needed.
      bucket.compressed_data = Chain();
      bucket.buffer_sizes = std::vector<size_t>();
    }
  }
  return &bucket.buffers[index_within_bucket];
}

inline bool TransposeDecoder::ContainsImplicitLoop(
    std::vector<StateMachineNode>* state_machine_nodes) {
  std::vector<size_t> implicit_loop_ids(state_machine_nodes->size(), 0);
  size_t next_loop_id = 1;
  for (size_t i = 0; i < state_machine_nodes->size(); ++i) {
    if (implicit_loop_ids[i] != 0) continue;
    StateMachineNode* node = &(*state_machine_nodes)[i];
    implicit_loop_ids[i] = next_loop_id;
    while (node->is_implicit) {
      node = node->next_node;
      const size_t j = node - state_machine_nodes->data();
      if (implicit_loop_ids[j] == next_loop_id) return true;
      if (implicit_loop_ids[j] != 0) break;
      implicit_loop_ids[j] = next_loop_id;
    }
    ++next_loop_id;
  }
  return false;
}

// InvalidArgumentError that is not inlined. This reduces register pressure for
// the Decode loop.
ABSL_ATTRIBUTE_NOINLINE absl::Status InvalidArgumentError(
    absl::string_view msg) {
  return absl::InvalidArgumentError(msg);
}

struct TransposeDecoder::DecodingState {
  // `SubmessageStack` is used to keep information about started nested
  // submessages. Decoding works in non-recursive loop and this class keeps the
  // information needed to finalize one submessages.
  //
  // A manual structure is used instead of `std::vector` to avoid unnecessary
  // zeroing or object construction in the submessage hot path.
  class SubmessageStack {
   public:
    ABSL_ATTRIBUTE_ALWAYS_INLINE void Push(size_t position,
                                           StateMachineNode* node) {
      EnsureSpaceForOne();
      positions_[size_] = position;
      nodes_[size_] = node;
      ++size_;
    }

    struct SubmessageStackElement {
      size_t end_of_submessage;
      StateMachineNode* submessage_node;
    };

    ABSL_ATTRIBUTE_ALWAYS_INLINE SubmessageStackElement Pop() {
      RIEGELI_ASSERT(!Empty());
      --size_;
      return SubmessageStackElement{positions_[size_], nodes_[size_]};
    }

    ABSL_ATTRIBUTE_ALWAYS_INLINE bool Empty() const { return size_ == 0; }

    ABSL_ATTRIBUTE_ALWAYS_INLINE absl::Span<const StateMachineNode* const>
    NodeSpan() const {
      return absl::MakeConstSpan(nodes_.get(), size_);
    }

   private:
    ABSL_ATTRIBUTE_ALWAYS_INLINE void EnsureSpaceForOne() {
      if (ABSL_PREDICT_FALSE(size_ == capacity_)) {
        if (ABSL_PREDICT_TRUE(capacity_ == 0)) {
          capacity_ = 16;
          positions_ = std::unique_ptr<size_t[]>(new size_t[capacity_]);
          nodes_ = std::unique_ptr<StateMachineNode*[]>(
              new StateMachineNode*[capacity_]);
        } else {
          capacity_ *= 2;
          std::unique_ptr<size_t[]> new_positions(new size_t[capacity_]);
          std::unique_ptr<StateMachineNode*[]> new_nodes(
              new StateMachineNode*[capacity_]);
          std::memcpy(new_positions.get(), positions_.get(),
                      size_ * sizeof(size_t));
          std::memcpy(new_nodes.get(), nodes_.get(),
                      size_ * sizeof(StateMachineNode*));
          positions_ = std::move(new_positions);
          nodes_ = std::move(new_nodes);
        }
      }
    }

    std::unique_ptr<size_t[]> positions_;
    std::unique_ptr<StateMachineNode*[]> nodes_;
    size_t size_ = 0;
    size_t capacity_ = 0;
  };

  // All pointers are non-null and owned elsewhere.
  explicit DecodingState(TransposeDecoder* decoder, Context* context,
                         uint64_t num_records, BackwardWriter* dest,
                         std::vector<size_t>* limits)
      : decoder(decoder),
        context(context),
        num_records(num_records),
        dest(dest),
        limits(limits),
        transitions_reader(&context->transitions.reader()),
        node(&context->state_machine_nodes[context->first_node]),
        num_iters(node->is_implicit ? 1 : 0) {
    // For now positions reported by `dest` are pushed to `limits` directly.
    // Later `limits` will be reversed and complemented.
    limits->clear();
    limits->reserve(num_records);
  }

  DecodingState(const DecodingState&) = delete;
  DecodingState& operator=(const DecodingState&) = delete;

  ABSL_ATTRIBUTE_ALWAYS_INLINE bool SetCallbackType() {
    return decoder->SetCallbackType(*context, skipped_submessage_level,
                                    submessage_stack.NodeSpan(), *node);
  }

  // Copy tag from `*node` to `dest`.
  template <size_t tag_length>
  ABSL_ATTRIBUTE_ALWAYS_INLINE bool CopyTagCallback() {
    if (ABSL_PREDICT_FALSE(
            !dest->Write(absl::string_view(node->tag_data, tag_length)))) {
      return decoder->Fail(dest->status());
    }
    return true;
  }

  template <uint8_t data_length, uint8_t low, uint8_t high>
  using EnableIfBetween =
      std::enable_if_t<(data_length >= low && data_length <= high), int>;

  template <uint8_t data_length>
  using Uint8Constant = std::integral_constant<uint8_t, data_length>;

  // Masks the first `data_length - 1` elements in `src` with 0x80, and copies
  // to `dest`.
  //
  // TODO: Replace with `if constexpr` in C++17.
  template <uint8_t data_length, EnableIfBetween<data_length, 9, 10> = 0>
  ABSL_ATTRIBUTE_ALWAYS_INLINE void MaskBuffer(
      const char* src, char* dest,
      std::integral_constant<uint8_t, data_length>) {
    uint64_t val;
    std::memcpy(&val, src, 8);
    val |= uint64_t{0x8080808080808080};
    std::memcpy(dest, &val, 8);
    MaskBuffer(src + 8, dest + 8, Uint8Constant<data_length - 8>());
  }

  template <uint8_t data_length, EnableIfBetween<data_length, 5, 8> = 0>
  ABSL_ATTRIBUTE_ALWAYS_INLINE void MaskBuffer(
      const char* src, char* dest,
      std::integral_constant<uint8_t, data_length>) {
    uint32_t val;
    std::memcpy(&val, src, 4);
    val |= uint32_t{0x80808080};
    std::memcpy(dest, &val, 4);
    MaskBuffer(src + 4, dest + 4, Uint8Constant<data_length - 4>());
  }

  template <uint8_t data_length, EnableIfBetween<data_length, 3, 4> = 0>
  ABSL_ATTRIBUTE_ALWAYS_INLINE void MaskBuffer(
      const char* src, char* dest,
      std::integral_constant<uint8_t, data_length>) {
    uint16_t val;
    std::memcpy(&val, src, 2);
    val |= uint16_t{0x8080};
    std::memcpy(dest, &val, 2);
    MaskBuffer(src + 2, dest + 2, Uint8Constant<data_length - 2>());
  }

  template <uint8_t data_length, EnableIfBetween<data_length, 2, 2> = 0>
  ABSL_ATTRIBUTE_ALWAYS_INLINE void MaskBuffer(
      const char* src, char* dest,
      std::integral_constant<uint8_t, data_length>) {
    *dest = static_cast<char>(static_cast<uint8_t>(*src) | uint8_t{0x80});
    MaskBuffer(src + 1, dest + 1, Uint8Constant<data_length - 1>());
  }

  template <uint8_t data_length, EnableIfBetween<data_length, 1, 1> = 0>
  ABSL_ATTRIBUTE_ALWAYS_INLINE void MaskBuffer(
      const char* src, char* dest,
      std::integral_constant<uint8_t, data_length>) {
    *dest = *src;
  }

  // Decode varint value from `*node` to `dest`.
  template <size_t tag_length, uint8_t data_length>
  ABSL_ATTRIBUTE_ALWAYS_INLINE bool VarintCallback() {
    if (ABSL_PREDICT_FALSE(!dest->Push(tag_length + data_length))) {
      return decoder->Fail(dest->status());
    }
    dest->move_cursor(tag_length + data_length);
    // Use a temporary buffer to allow it to be in a register for processing.
    char unmasked[data_length];
    char* const buffer = dest->cursor();
    if (ABSL_PREDICT_FALSE(!node->buffer->Read(data_length, unmasked))) {
      return decoder->Fail(node->buffer->StatusOrAnnotate(
          InvalidArgumentError("Reading varint field failed")));
    }
    MaskBuffer(unmasked, buffer + tag_length, Uint8Constant<data_length>());
    std::memcpy(buffer, node->tag_data, tag_length);
    return true;
  }

  // Decode fixed32 or fixed64 value from `*node` to `dest`.
  template <size_t tag_length, size_t data_length>
  ABSL_ATTRIBUTE_ALWAYS_INLINE bool FixedCallback() {
    if (ABSL_PREDICT_FALSE(!dest->Push(tag_length + data_length))) {
      return decoder->Fail(dest->status());
    }
    dest->move_cursor(tag_length + data_length);
    char* const buffer = dest->cursor();
    if (ABSL_PREDICT_FALSE(
            !node->buffer->Read(data_length, buffer + tag_length))) {
      return decoder->Fail(node->buffer->StatusOrAnnotate(
          InvalidArgumentError("Reading fixed field failed")));
    }
    std::memcpy(buffer, node->tag_data, tag_length);
    return true;
  }

  // Create zero fixed32 or fixed64 value in `dest`.
  template <size_t tag_length, size_t data_length>
  ABSL_ATTRIBUTE_ALWAYS_INLINE bool FixedExistenceCallback() {
    if (ABSL_PREDICT_FALSE(!dest->Push(tag_length + data_length))) {
      return decoder->Fail(dest->status());
    }
    dest->move_cursor(tag_length + data_length);
    char* const buffer = dest->cursor();
    std::memset(buffer + tag_length, '\0', data_length);
    std::memcpy(buffer, node->tag_data, tag_length);
    return true;
  }

  // Decode string value from `*node` to `dest`.
  template <size_t tag_length>
  ABSL_ATTRIBUTE_ALWAYS_INLINE bool StringCallback() {
    node->buffer->Pull(kMaxLengthVarint32);
    uint32_t length;
    const size_t length_length =
        ReadVarint32(node->buffer->cursor(), node->buffer->available(), length);
    if (ABSL_PREDICT_FALSE(length_length == 0)) {
      return decoder->Fail(node->buffer->StatusOrAnnotate(
          InvalidArgumentError("Reading string length failed")));
    }
    if (ABSL_PREDICT_FALSE(length > std::numeric_limits<uint32_t>::max() -
                                        length_length)) {
      return decoder->Fail(InvalidArgumentError("String length overflow"));
    }
    if (ABSL_PREDICT_FALSE(
            !node->buffer->Copy(length_length + length, *dest))) {
      return decoder->Fail(
          !dest->ok() ? dest->status()
                      : node->buffer->StatusOrAnnotate(InvalidArgumentError(
                            "Reading string field failed")));
    }
    if (ABSL_PREDICT_FALSE(
            !dest->Write(absl::string_view(node->tag_data, tag_length)))) {
      return decoder->Fail(dest->status());
    }
    return true;
  }

  template <size_t tag_length>
  ABSL_ATTRIBUTE_ALWAYS_INLINE bool StartProjectionGroupCallback() {
    if (ABSL_PREDICT_FALSE(submessage_stack.Empty())) {
      return decoder->Fail(InvalidArgumentError("Submessage stack underflow"));
    }
    submessage_stack.Pop();
    return CopyTagCallback<tag_length>();
  }

  template <size_t tag_length>
  ABSL_ATTRIBUTE_ALWAYS_INLINE bool EndProjectionGroupCallback() {
    submessage_stack.Push(IntCast<size_t>(dest->pos()), node);
    return CopyTagCallback<tag_length>();
  }

  ABSL_ATTRIBUTE_ALWAYS_INLINE bool SubmessageStartCallback() {
    if (ABSL_PREDICT_FALSE(submessage_stack.Empty())) {
      return decoder->Fail(InvalidArgumentError("Submessage stack underflow"));
    }
    auto elem = submessage_stack.Pop();
    RIEGELI_ASSERT_GE(dest->pos(), elem.end_of_submessage)
        << "Destination position decreased";
    const size_t length = IntCast<size_t>(dest->pos()) - elem.end_of_submessage;
    if (ABSL_PREDICT_FALSE(length > std::numeric_limits<uint32_t>::max())) {
      return decoder->Fail(InvalidArgumentError("Message too large"));
    }
    const size_t varint_length = LengthVarint32(IntCast<uint32_t>(length));
    uint8_t tag_data_size = elem.submessage_node->tag_data_size;
    size_t header_length = varint_length + tag_data_size;
    if (ABSL_PREDICT_FALSE(!dest->Push(header_length))) {
      return decoder->Fail(dest->status());
    }
    dest->move_cursor(header_length);
    char* cursor = dest->cursor();
    std::memcpy(cursor, elem.submessage_node->tag_data, tag_data_size);
    WriteVarint32(IntCast<uint32_t>(length), cursor + tag_data_size);
    return true;
  }

  ABSL_ATTRIBUTE_ALWAYS_INLINE bool MessageStartCallback() {
    if (ABSL_PREDICT_FALSE(!submessage_stack.Empty())) {
      return decoder->Fail(InvalidArgumentError("Submessages still open"));
    }
    if (ABSL_PREDICT_FALSE(limits->size() == num_records)) {
      return decoder->Fail(InvalidArgumentError("Too many records"));
    }
    limits->push_back(IntCast<size_t>(dest->pos()));
    return true;
  }

  ABSL_ATTRIBUTE_ALWAYS_INLINE bool NonprotoCallback() {
    uint32_t length;
    if (ABSL_PREDICT_FALSE(!ReadVarint32(*context->nonproto_lengths, length))) {
      return decoder->Fail(context->nonproto_lengths->StatusOrAnnotate(
          InvalidArgumentError("Reading non-proto record length failed")));
    }
    if (ABSL_PREDICT_FALSE(!node->buffer->Copy(length, *dest))) {
      return decoder->Fail(
          !dest->ok() ? dest->status()
                      : node->buffer->StatusOrAnnotate(InvalidArgumentError(
                            "Reading non-proto record failed")));
    }
    return MessageStartCallback();
  }

  ABSL_ATTRIBUTE_ALWAYS_INLINE bool HandleNode() {
    // Use a loop instead of recursion to avoid unbounded stack growth.
    // for loop is only executed more than once in the case of kSelectCallback.
    for (;;) {
      switch (node->callback_type) {
        case CallbackType::kSelectCallback:
          if (ABSL_PREDICT_FALSE(!SetCallbackType())) return false;
          continue;

        case CallbackType::kSkippedSubmessageEnd:
          ++skipped_submessage_level;
          return true;

        case CallbackType::kSkippedSubmessageStart:
          if (ABSL_PREDICT_FALSE(skipped_submessage_level == 0)) {
            return decoder->Fail(
                InvalidArgumentError("Skipped submessage stack underflow"));
          }
          --skipped_submessage_level;
          return true;

        case CallbackType::kSubmessageEnd:
          submessage_stack.Push(IntCast<size_t>(dest->pos()), node);
          return true;

        case CallbackType::kSubmessageStart:
          return SubmessageStartCallback();

#define ACTIONS_FOR_TAG_LEN(tag_length)                  \
  case CallbackType::kCopyTag_##tag_length:              \
    return CopyTagCallback<tag_length>();                \
  case CallbackType::kVarint_1_##tag_length:             \
    return VarintCallback<tag_length, 1>();              \
  case CallbackType::kVarint_2_##tag_length:             \
    return VarintCallback<tag_length, 2>();              \
  case CallbackType::kVarint_3_##tag_length:             \
    return VarintCallback<tag_length, 3>();              \
  case CallbackType::kVarint_4_##tag_length:             \
    return VarintCallback<tag_length, 4>();              \
  case CallbackType::kVarint_5_##tag_length:             \
    return VarintCallback<tag_length, 5>();              \
  case CallbackType::kVarint_6_##tag_length:             \
    return VarintCallback<tag_length, 6>();              \
  case CallbackType::kVarint_7_##tag_length:             \
    return VarintCallback<tag_length, 7>();              \
  case CallbackType::kVarint_8_##tag_length:             \
    return VarintCallback<tag_length, 8>();              \
  case CallbackType::kVarint_9_##tag_length:             \
    return VarintCallback<tag_length, 9>();              \
  case CallbackType::kVarint_10_##tag_length:            \
    return VarintCallback<tag_length, 10>();             \
  case CallbackType::kFixed32_##tag_length:              \
    return FixedCallback<tag_length, 4>();               \
  case CallbackType::kFixed64_##tag_length:              \
    return FixedCallback<tag_length, 8>();               \
  case CallbackType::kFixed32Existence_##tag_length:     \
    return FixedExistenceCallback<tag_length, 4>();      \
  case CallbackType::kFixed64Existence_##tag_length:     \
    return FixedExistenceCallback<tag_length, 8>();      \
  case CallbackType::kString_##tag_length:               \
    return StringCallback<tag_length>();                 \
  case CallbackType::kStartProjectionGroup_##tag_length: \
    return StartProjectionGroupCallback<tag_length>();   \
  case CallbackType::kEndProjectionGroup_##tag_length:   \
    return EndProjectionGroupCallback<tag_length>()

          ACTIONS_FOR_TAG_LEN(1);
          ACTIONS_FOR_TAG_LEN(2);
          ACTIONS_FOR_TAG_LEN(3);
          ACTIONS_FOR_TAG_LEN(4);
          ACTIONS_FOR_TAG_LEN(5);
#undef ACTIONS_FOR_TAG_LEN

        case CallbackType::kCopyTag_6:
          return CopyTagCallback<6>();

        case CallbackType::kUnknown:
        case CallbackType::kFailure:
          return decoder->Fail(InvalidArgumentError("Invalid node index"));

        case CallbackType::kNonProto:
          return NonprotoCallback();

        case CallbackType::kMessageStart:
          return MessageStartCallback();

        case CallbackType::kNoOp:
          return true;
      }
      RIEGELI_ASSUME_UNREACHABLE()
          << "Unknown callback type: " << static_cast<int>(node->callback_type);
    }
  }

  ABSL_ATTRIBUTE_ALWAYS_INLINE bool TransitionNode() {
    node = node->next_node;
    if (num_iters == 0) {
      uint8_t transition_byte;
      if (ABSL_PREDICT_FALSE(!transitions_reader->ReadByte(transition_byte))) {
        return false;
      }
      node += (transition_byte >> 2);
      num_iters = (transition_byte & 3) + (node->is_implicit ? 1 : 0);
    } else {
      num_iters -= node->is_implicit ? 0 : 1;
    }
    return true;
  }

  ABSL_ATTRIBUTE_ALWAYS_INLINE bool Finish() {
    if (ABSL_PREDICT_FALSE(!context->transitions.VerifyEndAndClose())) {
      return decoder->Fail(context->transitions.status());
    }
    if (ABSL_PREDICT_FALSE(!submessage_stack.Empty())) {
      return decoder->Fail(InvalidArgumentError("Submessages still open"));
    }
    if (ABSL_PREDICT_FALSE(skipped_submessage_level != 0)) {
      return decoder->Fail(
          InvalidArgumentError("Skipped submessages still open"));
    }
    if (ABSL_PREDICT_FALSE(limits->size() != num_records)) {
      return decoder->Fail(InvalidArgumentError("Too few records"));
    }
    const size_t size = limits->empty() ? size_t{0} : limits->back();
    if (ABSL_PREDICT_FALSE(size != dest->pos())) {
      return decoder->Fail(InvalidArgumentError("Unfinished message"));
    }
    // Reverse `limits` and complement them, but keep the last limit unchanged
    // (because both old and new limits exclude 0 at the beginning and include
    // size at the end), e.g. for records of sizes {10, 20, 30, 40}:
    // {40, 70, 90, 100} -> {10, 30, 60, 100}.
    std::vector<size_t>::iterator first = limits->begin();
    std::vector<size_t>::iterator last = limits->end();
    if (first != last) {
      --last;
      while (first < last) {
        --last;
        const size_t tmp = size - *first;
        *first = size - *last;
        *last = tmp;
        ++first;
      }
    }
    return true;
  }

  // Initial state
  TransposeDecoder* decoder;
  Context* context;
  uint64_t num_records;
  BackwardWriter* dest;
  std::vector<size_t>* limits;
  Reader* transitions_reader;

  // Variable state

  // The current node.
  StateMachineNode* node;
  // The depth of the current field relative to the parent submessage that
  // was excluded in projection.
  int skipped_submessage_level = 0;
  // Stack of all open sub-messages.
  SubmessageStack submessage_stack;
  // Number of following iteration that go directly to `node->next_node`
  // without reading transition byte. Maximum value is 4.
  int8_t num_iters;
};

inline bool TransposeDecoder::Decode(Context& context, uint64_t num_records,
                                     BackwardWriter& dest,
                                     std::vector<size_t>& limits) {
  DecodingState state(this, &context, num_records, &dest, &limits);
  do {
    if (ABSL_PREDICT_FALSE(!state.HandleNode())) return false;
  } while (ABSL_PREDICT_TRUE(state.TransitionNode()));
  return state.Finish();
}

// Do not inline this function. This helps Clang to generate better code for
// the main loop in `Decode()`.
ABSL_ATTRIBUTE_NOINLINE inline bool TransposeDecoder::SetCallbackType(
    Context& context, int skipped_submessage_level,
    absl::Span<const StateMachineNode* const> submessage_stack,
    StateMachineNode& node) {
  StateMachineNodeTemplate* node_template = node.node_template;
  if (node_template->tag ==
      static_cast<uint32_t>(
          chunk_encoding_internal::MessageId::kStartOfSubmessage)) {
    if (skipped_submessage_level > 0) {
      node.callback_type = CallbackType::kSkippedSubmessageStart;
    } else {
      node.callback_type = CallbackType::kSubmessageStart;
    }
  } else {
    FieldIncluded field_included = FieldIncluded::kNo;
    uint32_t field_id = kInvalidPos;
    if (skipped_submessage_level == 0) {
      field_included = FieldIncluded::kExistenceOnly;
      for (const StateMachineNode* elem : submessage_stack) {
        uint32_t tag;
        RIEGELI_EVAL_ASSERT(
            ReadVarint32(elem->tag_data, kMaxLengthVarint32, tag) > 0)
            << "Invalid tag";
        const absl::flat_hash_map<std::pair<uint32_t, int>,
                                  Context::IncludedField>::const_iterator iter =
            context.include_fields.find(
                std::make_pair(field_id, GetTagFieldNumber(tag)));
        if (iter == context.include_fields.end()) {
          field_included = FieldIncluded::kNo;
          break;
        }
        if (iter->second.include_type == Context::IncludeType::kIncludeFully) {
          field_included = FieldIncluded::kYes;
          break;
        }
        field_id = iter->second.field_id;
      }
    }
    // If tag is a `kStartGroup`, there are two options:
    // 1. Either related `kEndGroup` was skipped and
    //    `skipped_submessage_level > 0`.
    //    In this case `field_included` is already set to `kNo`.
    // 2. If `kEndGroup` was not skipped, then its tag is on the top of the
    //    `submessage_stack` and in that case we already checked its tag in
    //    `include_fields` in the loop above.
    const bool start_group_tag =
        GetTagWireType(node_template->tag) == WireType::kStartGroup;
    if (!start_group_tag && field_included == FieldIncluded::kExistenceOnly) {
      uint32_t tag;
      RIEGELI_EVAL_ASSERT(ReadVarint32(node.tag_data, kMaxLengthVarint32, tag) >
                          0)
          << "Invalid tag";
      const absl::flat_hash_map<std::pair<uint32_t, int>,
                                Context::IncludedField>::const_iterator iter =
          context.include_fields.find(
              std::make_pair(field_id, GetTagFieldNumber(tag)));
      if (iter == context.include_fields.end()) {
        field_included = FieldIncluded::kNo;
      } else {
        if (iter->second.include_type == Context::IncludeType::kIncludeFully ||
            iter->second.include_type == Context::IncludeType::kIncludeChild) {
          field_included = FieldIncluded::kYes;
        }
      }
    }
    if (node_template->bucket_index != kInvalidPos) {
      switch (field_included) {
        case FieldIncluded::kYes:
          node.buffer = GetBuffer(context, node_template->bucket_index,
                                  node_template->buffer_within_bucket_index);
          if (ABSL_PREDICT_FALSE(node.buffer == nullptr)) return false;
          break;
        case FieldIncluded::kNo:
        case FieldIncluded::kExistenceOnly:
          node.buffer = kEmptyReader();
          break;
      }
    } else {
      node.buffer = kEmptyReader();
    }
    node.callback_type = GetCallbackType(field_included, node_template->tag,
                                         node_template->subtype,
                                         node_template->tag_length, true);
    if (field_included == FieldIncluded::kExistenceOnly &&
        GetTagWireType(node_template->tag) == WireType::kVarint) {
      // The tag in `tag_data` was followed by a subtype but must be followed by
      // zero now.
      node.tag_data[node_template->tag_length] = 0;
    }
  }
  return true;
}

}  // namespace riegeli


================================================
FILE: riegeli/chunk_encoding/transpose_decoder.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CHUNK_ENCODING_TRANSPOSE_DECODER_H_
#define RIEGELI_CHUNK_ENCODING_TRANSPOSE_DECODER_H_

#include <stddef.h>
#include <stdint.h>

#include <optional>
#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/container/fixed_array.h"
#include "absl/types/span.h"
#include "riegeli/base/object.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/chunk_encoding/field_projection.h"

namespace riegeli {

class TransposeDecoder : public Object {
 public:
  class Options {
   public:
    Options() noexcept {}

    // Options for a global `RecyclingPool` of decompression contexts.
    //
    // They tune the amount of memory which is kept to speed up creation of new
    // decompression sessions, and usage of a background thread to clean it.
    //
    // Default: `RecyclingPoolOptions()`.
    Options& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      recycling_pool_options_ = recycling_pool_options;
      return *this;
    }
    Options&& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_recycling_pool_options(recycling_pool_options));
    }
    const RecyclingPoolOptions& recycling_pool_options() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recycling_pool_options_;
    }

   private:
    RecyclingPoolOptions recycling_pool_options_;
  };

  // Creates a closed `TransposeDecoder`.
  explicit TransposeDecoder(Options options = Options())
      : Object(kClosed),
        recycling_pool_options_(options.recycling_pool_options()) {}

  TransposeDecoder(const TransposeDecoder&) = delete;
  TransposeDecoder& operator=(const TransposeDecoder&) = delete;

  // Resets the `TransposeDecoder` and parses the chunk.
  //
  // Writes concatenated record values to `dest`. Sets `limits` to sorted
  // record end positions.
  //
  // Precondition: `dest.pos() == 0`
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`);
  //              if `!dest.ok()` then the problem was at `dest`
  bool Decode(uint64_t num_records, uint64_t decoded_data_size,
              const FieldProjection& field_projection, Reader& src,
              BackwardWriter& dest, std::vector<size_t>& limits);

 private:
  struct StateMachineNode;
  struct Context;
  struct DecodingState;

  bool Parse(Context& context, Reader& src,
             const FieldProjection& field_projection);

  // Parse data buffers in `header_reader` and `src` into `context.buffers`.
  // This method is used when projection is disabled and all buffers are
  // initially decompressed.
  bool ParseBuffers(Context& context, Reader& header_reader, Reader& src);

  // Parse data buffers in `header_reader` and `src` into `context.buckets`.
  // When projection is enabled, buckets are decompressed on demand.
  // `bucket_indices` contains bucket index for each buffer.
  // `first_buffer_indices` contains the index of first buffer for each bucket.
  bool ParseBuffersForFiltering(
      Context& context, Reader& header_reader, Reader& src,
      std::optional<absl::FixedArray<uint32_t>>& first_buffer_indices,
      std::optional<absl::FixedArray<uint32_t>>& bucket_indices);

  // Precondition: `projection_enabled`.
  Reader* GetBuffer(Context& context, uint32_t bucket_index,
                    uint32_t index_within_bucket);

  static bool ContainsImplicitLoop(
      std::vector<StateMachineNode>* state_machine_nodes);

  bool Decode(Context& context, uint64_t num_records, BackwardWriter& dest,
              std::vector<size_t>& limits);

  // Set `callback_type` in `node` based on `skipped_submessage_level`,
  // `submessage_stack`, and `node.node_template`.
  bool SetCallbackType(
      Context& context, int skipped_submessage_level,
      absl::Span<const StateMachineNode* const> submessage_stack,
      StateMachineNode& node);

  RecyclingPoolOptions recycling_pool_options_;
};

}  // namespace riegeli

#endif  // RIEGELI_CHUNK_ENCODING_TRANSPOSE_DECODER_H_


================================================
FILE: riegeli/chunk_encoding/transpose_encoder.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/chunk_encoding/transpose_encoder.h"

#include <stddef.h>
#include <stdint.h>

#include <algorithm>
#include <limits>
#include <memory>
#include <optional>
#include <queue>
#include <string>
#include <utility>
#include <vector>

#include "absl/base/optimization.h"
#include "absl/container/flat_hash_map.h"
#include "absl/container/inlined_vector.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/chain_backward_writer.h"
#include "riegeli/bytes/chain_reader.h"
#include "riegeli/bytes/chain_writer.h"
#include "riegeli/bytes/cord_reader.h"
#include "riegeli/bytes/limiting_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/string_reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/chunk_encoder.h"
#include "riegeli/chunk_encoding/compressor.h"
#include "riegeli/chunk_encoding/compressor_options.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/chunk_encoding/transpose_internal.h"
#include "riegeli/messages/message_wire_format.h"
#include "riegeli/varint/varint_reading.h"
#include "riegeli/varint/varint_writing.h"

namespace riegeli {

namespace {

constexpr uint32_t kInvalidPos = std::numeric_limits<uint32_t>::max();
// Maximum varint value to encode as varint subtype instead of using the buffer.
constexpr uint8_t kMaxVarintInline = 3;

static_assert(kMaxVarintInline < 0x80,
              "Only one byte is used to store inline varint and its value must "
              "concide with its varint encoding");

// Maximum depth of the nested message we break into columns. Submessages with
// deeper nesting are encoded as strings.
constexpr int kMaxRecursionDepth = 100;

// Returns `true` if `record` is a valid protocol buffer message in the
// canonical encoding. The purpose of this method is to distinguish a string
// from a submessage in the proto wire format and to perform validity checks
// that are asserted later (such as that double proto field is followed by at
// least 8 bytes of data).
//
// Note: Protocol buffer with suboptimal varint encoded tags and values (such as
// `0x87, 0x00` instead of `0x07`) would parse successfully with the default
// proto parser. This can happen for binary strings in proto. However, we need
// to produce exactly the same bytes in the output so we reject message encoded
// in non-canonical way.
bool IsProtoMessage(Reader& record) {
  // We validate that all started proto groups are closed with endgroup tag.
  std::vector<int> started_groups;
  while (record.Pull()) {
    uint32_t tag;
    if (!ReadCanonicalVarint32(record, tag)) return false;
    const int field_number = GetTagFieldNumber(tag);
    if (field_number == 0) return false;
    switch (GetTagWireType(tag)) {
      case WireType::kVarint:
        if (!SkipCanonicalVarint64(record)) return false;
        continue;
      case WireType::kFixed32:
        if (!record.Skip(sizeof(uint32_t))) return false;
        continue;
      case WireType::kFixed64:
        if (!record.Skip(sizeof(uint64_t))) return false;
        continue;
      case WireType::kLengthDelimited: {
        uint32_t length;
        if (!ReadCanonicalVarint32(record, length) || !record.Skip(length)) {
          return false;
        }
        continue;
      }
      case WireType::kStartGroup:
        started_groups.push_back(field_number);
        continue;
      case WireType::kEndGroup:
        if (started_groups.empty() || started_groups.back() != field_number) {
          return false;
        }
        started_groups.pop_back();
        continue;
      case WireType::kInvalid6:
      case WireType::kInvalid7:
        return false;
    }
    RIEGELI_ASSUME_UNREACHABLE()
        << "Impossible wire type: " << static_cast<int>(GetTagWireType(tag));
  }
  RIEGELI_ASSERT_OK(record);
  return started_groups.empty();
}

// `PriorityQueueEntry` is used in `tag_priority` to order destinations by the
// number of transitions into them.
struct PriorityQueueEntry : WithCompare<PriorityQueueEntry> {
  PriorityQueueEntry() = default;

  explicit PriorityQueueEntry(uint32_t dest_index, size_t num_transitions)
      : dest_index(dest_index), num_transitions(num_transitions) {}

  friend bool operator==(const PriorityQueueEntry& a,
                         const PriorityQueueEntry& b) {
    return a.dest_index == b.dest_index &&
           a.num_transitions == b.num_transitions;
  }
  friend StrongOrdering RIEGELI_COMPARE(const PriorityQueueEntry& a,
                                        const PriorityQueueEntry& b) {
    // Sort by `num_transitions`. Largest first.
    if (const StrongOrdering ordering =
            riegeli::Compare(b.num_transitions, a.num_transitions);
        ordering != 0) {
      return ordering;
    }
    // Break ties for reproducible ordering.
    return riegeli::Compare(a.dest_index, b.dest_index);
  }

  // Index of the destination in `tags_list_`.
  uint32_t dest_index = 0;
  // Number of transitions into destination.
  size_t num_transitions = 0;
};

}  // namespace

inline TransposeEncoder::MessageNode::MessageNode(
    chunk_encoding_internal::MessageId message_id)
    : message_id(message_id) {}

inline TransposeEncoder::NodeId::NodeId(
    chunk_encoding_internal::MessageId parent_message_id, uint32_t tag)
    : parent_message_id(parent_message_id), tag(tag) {}

inline TransposeEncoder::StateInfo::StateInfo()
    : etag_index(kInvalidPos),
      base(kInvalidPos),
      canonical_source(kInvalidPos) {}

inline TransposeEncoder::StateInfo::StateInfo(uint32_t etag_index,
                                              uint32_t base)
    : etag_index(etag_index), base(base), canonical_source(kInvalidPos) {}

inline TransposeEncoder::DestInfo::DestInfo() : pos(kInvalidPos) {}

inline TransposeEncoder::EncodedTagInfo::EncodedTagInfo(
    NodeId node_id, chunk_encoding_internal::Subtype subtype)
    : node_id(node_id),
      subtype(subtype),
      state_machine_pos(kInvalidPos),
      public_list_noop_pos(kInvalidPos),
      base(kInvalidPos) {}

inline TransposeEncoder::BufferWithMetadata::BufferWithMetadata(NodeId node_id)
    : buffer(std::make_unique<Chain>()), node_id(node_id) {}

TransposeEncoder::TransposeEncoder(CompressorOptions compressor_options,
                                   TuningOptions tuning_options)
    : compressor_options_(std::move(compressor_options)),
      bucket_size_(compressor_options_.compression_type() ==
                           CompressionType::kNone
                       ? std::numeric_limits<uint64_t>::max()
                       : tuning_options.bucket_size()),
      recycling_pool_options_(tuning_options.recycling_pool_options()) {}

TransposeEncoder::~TransposeEncoder() {}

void TransposeEncoder::Clear() {
  ChunkEncoder::Clear();
  tags_list_.clear();
  encoded_tags_.clear();
  for (std::vector<BufferWithMetadata>& buffers : data_) buffers.clear();
  message_stack_.clear();
  group_stack_.clear();
  message_nodes_.clear();
  nonproto_lengths_writer_.Reset();
  next_message_id_ = chunk_encoding_internal::MessageId::kRoot + 1;
}

bool TransposeEncoder::AddRecord(BytesRef record) {
  StringReader<> reader(record);
  return AddRecordInternal(reader);
}

bool TransposeEncoder::AddRecord(ExternalRef record) {
  if (record.size() <= kMaxBytesToCopy) {
    StringReader<> reader(record);
    return AddRecordInternal(reader);
  } else {
    ChainReader<Chain> reader(riegeli::Maker<Chain>(std::move(record)));
    return AddRecordInternal(reader);
  }
}

bool TransposeEncoder::AddRecord(const Chain& record) {
  ChainReader<> reader(&record);
  return AddRecordInternal(reader);
}

bool TransposeEncoder::AddRecord(const absl::Cord& record) {
  CordReader<> reader(&record);
  return AddRecordInternal(reader);
}

bool TransposeEncoder::AddRecords(Chain records, std::vector<size_t> limits) {
  RIEGELI_ASSERT_EQ(limits.empty() ? 0u : limits.back(), records.size())
      << "Failed precondition of ChunkEncoder::AddRecords(): "
         "record end positions do not match concatenated record values";
  LimitingReader<ChainReader<>> record_reader(
      riegeli::Maker(&records), LimitingReaderBase::Options().set_exact_pos(0));
  for (const size_t limit : limits) {
    RIEGELI_ASSERT_GE(limit, record_reader.pos())
        << "Failed precondition of ChunkEncoder::AddRecords(): "
           "record end positions not sorted";
    record_reader.set_max_pos(limit);
    if (ABSL_PREDICT_FALSE(!AddRecordInternal(record_reader))) return false;
    RIEGELI_ASSERT_EQ(record_reader.pos(), limit)
        << "Record was not read up to its end";
  }
  RIEGELI_EVAL_ASSERT(record_reader.Close()) << record_reader.status();
  return true;
}

inline bool TransposeEncoder::AddRecordInternal(Reader& record) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  RIEGELI_ASSERT_OK(record)
      << "Failed precondition of TransposeEncoder::AddRecordInternal()";
  const Position pos_before = record.pos();
  std::optional<Position> size = record.Size();
  RIEGELI_ASSERT(size != std::nullopt) << record.status();
  RIEGELI_ASSERT_LE(pos_before, *size)
      << "Current position after the end of record";
  *size -= pos_before;
  if (ABSL_PREDICT_FALSE(num_records_ == kMaxNumRecords)) {
    return Fail(absl::ResourceExhaustedError("Too many records"));
  }
  if (ABSL_PREDICT_FALSE(*size > std::numeric_limits<uint64_t>::max() -
                                     decoded_data_size_)) {
    return Fail(absl::ResourceExhaustedError("Decoded data size too large"));
  }
  ++num_records_;
  decoded_data_size_ += IntCast<uint64_t>(*size);
  const bool is_proto = IsProtoMessage(record);
  RIEGELI_EVAL_ASSERT(record.Seek(pos_before)) << record.status();
  if (is_proto) {
    encoded_tags_.push_back(GetPosInTagsList(
        GetNode(NodeId(chunk_encoding_internal::MessageId::kStartOfMessage, 0)),
        chunk_encoding_internal::Subtype::kTrivial));
    return AddMessage(record);
  } else {
    Node* node =
        GetNode(NodeId(chunk_encoding_internal::MessageId::kNonProto, 0));
    encoded_tags_.push_back(
        GetPosInTagsList(node, chunk_encoding_internal::Subtype::kTrivial));
    BackwardWriter* const buffer = GetBuffer(node, BufferType::kNonProto);
    if (ABSL_PREDICT_FALSE(!record.Copy(IntCast<size_t>(*size), *buffer))) {
      return Fail(buffer->status());
    }
    if (ABSL_PREDICT_FALSE(!WriteVarint64(IntCast<uint64_t>(*size),
                                          nonproto_lengths_writer_))) {
      return Fail(nonproto_lengths_writer_.status());
    }
    return true;
  }
}

inline BackwardWriter* TransposeEncoder::GetBuffer(Node* node,
                                                   BufferType type) {
  if (!node->second.writer) {
    std::vector<BufferWithMetadata>& buffers =
        data_[static_cast<uint32_t>(type)];
    buffers.emplace_back(node->first);
    node->second.writer =
        std::make_unique<ChainBackwardWriter<>>(buffers.back().buffer.get());
  }
  return node->second.writer.get();
}

inline uint32_t TransposeEncoder::GetPosInTagsList(
    Node* node, chunk_encoding_internal::Subtype subtype) {
  size_t pos = static_cast<size_t>(subtype);
  if (node->second.encoded_tag_pos.size() <= pos) {
    node->second.encoded_tag_pos.resize(pos + 1,
                                        std::numeric_limits<uint32_t>::max());
  }
  uint32_t* ret = &node->second.encoded_tag_pos[pos];
  if (*ret == std::numeric_limits<uint32_t>::max()) {
    *ret = tags_list_.size();
    tags_list_.emplace_back(node->first, subtype);
  }
  return *ret;
}

inline TransposeEncoder::Node* TransposeEncoder::GetNode(NodeId node_id) {
  const std::pair<absl::flat_hash_map<NodeId, MessageNode>::iterator, bool>
      insert_result = message_nodes_.try_emplace(node_id, next_message_id_);
  if (insert_result.second) ++next_message_id_;
  return &*insert_result.first;
}

// Precondition: `IsProtoMessage` returns `true` for this record.
// Note: Encoded tags are appended into `encoded_tags_` but data is prepended
// into respective buffers. `encoded_tags_` will be later traversed backwards.
inline bool TransposeEncoder::AddMessage(Reader& record) {
  chunk_encoding_internal::MessageId parent_message_id =
      chunk_encoding_internal::MessageId::kRoot;
  LimitingReader<> limited_record(&record);
  for (;;) {
    while (limited_record.Pull()) {
      uint32_t tag;
      RIEGELI_EVAL_ASSERT(ReadVarint32(limited_record, tag))
          << "Invalid tag: " << limited_record.status();
      Node* node = GetNode(NodeId(parent_message_id, tag));
      switch (GetTagWireType(tag)) {
        case WireType::kVarint: {
          // Storing value as `uint64_t[2]` instead of `uint8_t[10]` lets Clang
          // and GCC generate better code for clearing high bit of each byte.
          uint64_t value[2];
          static_assert(sizeof(value) >= kMaxLengthVarint64,
                        "value too small to hold a varint64");
          const size_t value_length =
              CopyVarint64(limited_record, reinterpret_cast<char*>(value));
          RIEGELI_ASSERT(value_length > 0)
              << "Invalid varint: " << limited_record.status();
          if (reinterpret_cast<const unsigned char*>(value)[0] <=
              kMaxVarintInline) {
            encoded_tags_.push_back(GetPosInTagsList(
                node, chunk_encoding_internal::Subtype::kVarintInline0 +
                          reinterpret_cast<const unsigned char*>(value)[0]));
          } else {
            encoded_tags_.push_back(GetPosInTagsList(
                node, chunk_encoding_internal::Subtype::kVarint1 +
                          IntCast<uint8_t>(value_length - 1)));
            // Clear high bit of each byte.
            for (uint64_t& word : value) word &= ~uint64_t{0x8080808080808080};
            BackwardWriter* const buffer = GetBuffer(node, BufferType::kVarint);
            if (ABSL_PREDICT_FALSE(!buffer->Write(absl::string_view(
                    reinterpret_cast<const char*>(value), value_length)))) {
              return Fail(buffer->status());
            }
          }
        } break;
        case WireType::kFixed32: {
          encoded_tags_.push_back(GetPosInTagsList(
              node, chunk_encoding_internal::Subtype::kTrivial));
          BackwardWriter* const buffer = GetBuffer(node, BufferType::kFixed32);
          if (ABSL_PREDICT_FALSE(
                  !limited_record.Copy(sizeof(uint32_t), *buffer))) {
            return Fail(buffer->status());
          }
        } break;
        case WireType::kFixed64: {
          encoded_tags_.push_back(GetPosInTagsList(
              node, chunk_encoding_internal::Subtype::kTrivial));
          BackwardWriter* const buffer = GetBuffer(node, BufferType::kFixed64);
          if (ABSL_PREDICT_FALSE(
                  !limited_record.Copy(sizeof(uint64_t), *buffer))) {
            return Fail(buffer->status());
          }
        } break;
        case WireType::kLengthDelimited: {
          const Position length_pos = limited_record.pos();
          uint32_t length;
          RIEGELI_EVAL_ASSERT(ReadVarint32(limited_record, length))
              << "Invalid length: " << limited_record.status();
          const Position value_pos = limited_record.pos();
          const Position parent_max_record_pos = limited_record.max_pos();
          limited_record.set_max_length(length);
          // Non-toplevel empty strings are treated as strings, not messages.
          // They have a simpler encoding this way (one node instead of two).
          if (length > 0 &&
              ABSL_PREDICT_TRUE(message_stack_.size() + group_stack_.size() <
                                kMaxRecursionDepth) &&
              IsProtoMessage(limited_record)) {
            encoded_tags_.push_back(
                GetPosInTagsList(node, chunk_encoding_internal::Subtype::
                                           kLengthDelimitedStartOfSubmessage));
            RIEGELI_EVAL_ASSERT(limited_record.Seek(value_pos))
                << limited_record.status();
            const uint32_t end_of_submessage_pos =
                GetPosInTagsList(node, chunk_encoding_internal::Subtype::
                                           kLengthDelimitedEndOfSubmessage);
#if __cpp_aggregate_paren_init
            message_stack_.emplace_back(end_of_submessage_pos,
                                        parent_message_id,
                                        IntCast<size_t>(parent_max_record_pos));
#else
            message_stack_.push_back(
                MessageFrame{end_of_submessage_pos, parent_message_id,
                             IntCast<size_t>(parent_max_record_pos)});
#endif
            parent_message_id = node->second.message_id;
            continue;
          }
          encoded_tags_.push_back(GetPosInTagsList(
              node, chunk_encoding_internal::Subtype::kLengthDelimitedString));
          RIEGELI_EVAL_ASSERT(limited_record.Seek(length_pos))
              << limited_record.status();
          BackwardWriter* const buffer = GetBuffer(node, BufferType::kString);
          if (ABSL_PREDICT_FALSE(!limited_record.Copy(
                  IntCast<size_t>(value_pos - length_pos) + length, *buffer))) {
            return Fail(buffer->status());
          }
          limited_record.set_max_pos(parent_max_record_pos);
        } break;
        case WireType::kStartGroup: {
          encoded_tags_.push_back(GetPosInTagsList(
              node, chunk_encoding_internal::Subtype::kTrivial));
          group_stack_.push_back(parent_message_id);
          parent_message_id = node->second.message_id;
        } break;
        case WireType::kEndGroup:
          parent_message_id = group_stack_.back();
          group_stack_.pop_back();
          // Note that `parent_message_id` was updated above so the `node` does
          // not belong to `(parent_message_id, tag)` as in all the other cases.
          // But we don't reload `node` because this still works. All we need is
          // some unique consistent node.
          encoded_tags_.push_back(GetPosInTagsList(
              node, chunk_encoding_internal::Subtype::kTrivial));
          break;
        default:
          RIEGELI_ASSUME_UNREACHABLE()
              << "Invalid wire type: "
              << static_cast<uint32_t>(GetTagWireType(tag));
      }
    }
    RIEGELI_ASSERT_OK(limited_record);
    if (message_stack_.empty()) return true;
    const MessageFrame& message_frame = message_stack_.back();
    encoded_tags_.push_back(message_frame.end_of_submessage_pos);
    parent_message_id = message_frame.parent_message_id;
    limited_record.set_max_pos(message_frame.parent_max_record_pos);
    message_stack_.pop_back();
  }
}

inline bool TransposeEncoder::AddBuffer(
    std::optional<size_t> new_uncompressed_bucket_size, const Chain& buffer,
    chunk_encoding_internal::Compressor& bucket_compressor, Writer& data_writer,
    std::vector<size_t>& compressed_bucket_sizes,
    std::vector<size_t>& buffer_sizes) {
  buffer_sizes.push_back(buffer.size());
  if (new_uncompressed_bucket_size != std::nullopt) {
    if (bucket_compressor.writer().pos() > 0) {
      const Position pos_before = data_writer.pos();
      if (ABSL_PREDICT_FALSE(!bucket_compressor.EncodeAndClose(data_writer))) {
        return Fail(bucket_compressor.status());
      }
      RIEGELI_ASSERT_GE(data_writer.pos(), pos_before)
          << "Data writer position decreased";
      compressed_bucket_sizes.push_back(
          IntCast<size_t>(data_writer.pos() - pos_before));
    }
    bucket_compressor.Clear(
        chunk_encoding_internal::Compressor::TuningOptions()
            .set_pledged_size(*new_uncompressed_bucket_size)
            .set_recycling_pool_options(recycling_pool_options_));
  }
  if (ABSL_PREDICT_FALSE(!bucket_compressor.writer().Write(buffer))) {
    return Fail(bucket_compressor.writer().status());
  }
  return true;
}

inline bool TransposeEncoder::WriteBuffers(
    Writer& header_writer, Writer& data_writer,
    absl::flat_hash_map<NodeId, uint32_t>* buffer_pos) {
  size_t num_buffers = 0;
  for (std::vector<BufferWithMetadata>& buffers : data_) {
    // Sort buffers by length, smallest to largest.
    std::sort(
        buffers.begin(), buffers.end(),
        [](const BufferWithMetadata& a, const BufferWithMetadata& b) {
          if (a.buffer->size() != b.buffer->size()) {
            return a.buffer->size() < b.buffer->size();
          }
          if (a.node_id.parent_message_id != b.node_id.parent_message_id) {
            return a.node_id.parent_message_id < b.node_id.parent_message_id;
          }
          return a.node_id.tag < b.node_id.tag;
        });
    num_buffers += buffers.size();
  }
  const Chain& nonproto_lengths = nonproto_lengths_writer_.dest();
  if (!nonproto_lengths.empty()) ++num_buffers;

  std::vector<size_t> compressed_bucket_sizes;
  std::vector<size_t> buffer_sizes;
  buffer_sizes.reserve(num_buffers);

  chunk_encoding_internal::Compressor bucket_compressor(
      compressor_options_,
      chunk_encoding_internal::Compressor::TuningOptions()
          .set_recycling_pool_options(recycling_pool_options_));
  for (absl::Span<const BufferWithMetadata> buffers : data_) {
    // Split data into buckets.
    size_t remaining_buffers_size = 0;
    for (const BufferWithMetadata& buffer : buffers) {
      remaining_buffers_size += buffer.buffer->size();
    }

    std::vector<size_t> uncompressed_bucket_sizes;
    size_t current_bucket_size = 0;
    for (absl::Span<const BufferWithMetadata>::const_reverse_iterator iter =
             buffers.crbegin();
         iter != buffers.crend(); ++iter) {
      const size_t current_buffer_size = iter->buffer->size();
      if (current_bucket_size > 0 &&
          current_bucket_size + current_buffer_size / 2 >= bucket_size_) {
        uncompressed_bucket_sizes.push_back(current_bucket_size);
        current_bucket_size = 0;
      }
      current_bucket_size += current_buffer_size;
      remaining_buffers_size -= current_buffer_size;
      if (remaining_buffers_size <= bucket_size_ / 2) {
        current_bucket_size += remaining_buffers_size;
        break;
      }
    }
    if (current_bucket_size > 0) {
      uncompressed_bucket_sizes.push_back(current_bucket_size);
    }

    current_bucket_size = 0;
    for (const BufferWithMetadata& buffer : buffers) {
      std::optional<size_t> new_uncompressed_bucket_size;
      if (current_bucket_size == 0) {
        RIEGELI_ASSERT(!uncompressed_bucket_sizes.empty())
            << "Bucket sizes and buffer sizes do not match";
        current_bucket_size = uncompressed_bucket_sizes.back();
        uncompressed_bucket_sizes.pop_back();
        new_uncompressed_bucket_size = current_bucket_size;
      }
      RIEGELI_ASSERT_GE(current_bucket_size, buffer.buffer->size())
          << "Bucket sizes and buffer sizes do not match";
      current_bucket_size -= buffer.buffer->size();
      if (ABSL_PREDICT_FALSE(!AddBuffer(
              new_uncompressed_bucket_size, *buffer.buffer, bucket_compressor,
              data_writer, compressed_bucket_sizes, buffer_sizes))) {
        return false;
      }
      const std::pair<absl::flat_hash_map<NodeId, uint32_t>::iterator, bool>
          insert_result = buffer_pos->emplace(
              buffer.node_id, IntCast<uint32_t>(buffer_pos->size()));
      RIEGELI_ASSERT(insert_result.second)
          << "Field already has buffer assigned: "
          << static_cast<uint32_t>(buffer.node_id.parent_message_id) << "/"
          << buffer.node_id.tag;
    }
    RIEGELI_ASSERT(uncompressed_bucket_sizes.empty())
        << "Bucket sizes and buffer sizes do not match";
    RIEGELI_ASSERT_EQ(current_bucket_size, 0u)
        << "Bucket sizes and buffer sizes do not match";
  }
  if (!nonproto_lengths.empty()) {
    // `nonproto_lengths` is the last buffer if non-empty.
    if (ABSL_PREDICT_FALSE(!AddBuffer(nonproto_lengths.size(), nonproto_lengths,
                                      bucket_compressor, data_writer,
                                      compressed_bucket_sizes, buffer_sizes))) {
      return false;
    }
    // Note: `nonproto_lengths` needs no `buffer_pos`.
  }

  if (bucket_compressor.writer().pos() > 0) {
    // Last bucket.
    const Position pos_before = data_writer.pos();
    if (ABSL_PREDICT_FALSE(!bucket_compressor.EncodeAndClose(data_writer))) {
      return Fail(bucket_compressor.status());
    }
    RIEGELI_ASSERT_GE(data_writer.pos(), pos_before)
        << "Data writer position decreased";
    compressed_bucket_sizes.push_back(
        IntCast<size_t>(data_writer.pos() - pos_before));
  }

  if (ABSL_PREDICT_FALSE(!WriteVarint32(
          IntCast<uint32_t>(compressed_bucket_sizes.size()), header_writer)) ||
      ABSL_PREDICT_FALSE(!WriteVarint32(IntCast<uint32_t>(buffer_sizes.size()),
                                        header_writer))) {
    return Fail(header_writer.status());
  }
  for (const size_t length : compressed_bucket_sizes) {
    if (ABSL_PREDICT_FALSE(
            !WriteVarint64(IntCast<uint64_t>(length), header_writer))) {
      return Fail(header_writer.status());
    }
  }
  for (const size_t length : buffer_sizes) {
    if (ABSL_PREDICT_FALSE(
            !WriteVarint64(IntCast<uint64_t>(length), header_writer))) {
      return Fail(header_writer.status());
    }
  }
  return true;
}

inline bool TransposeEncoder::WriteStatesAndData(
    uint32_t max_transition, absl::Span<const StateInfo> state_machine,
    Writer& header_writer, Writer& data_writer) {
  if (!encoded_tags_.empty() &&
      tags_list_[encoded_tags_[0]].dest_info.size() == 1) {
    // There should be no implicit transition from the last state. If there was
    // one, then it would not be obvious whether to stop or continue decoding.
    // Only if transition is explicit we check whether there is more transition
    // bytes.
    absl::flat_hash_map<uint32_t, DestInfo>& dest_info =
        tags_list_[encoded_tags_[0]].dest_info;
    const uint32_t first_key = dest_info.begin()->first;
    dest_info[first_key + 1];
    RIEGELI_ASSERT_NE(tags_list_[encoded_tags_[0]].dest_info.size(), 1u)
        << "Number of transitions from the last state did not increase";
  }
  absl::flat_hash_map<NodeId, uint32_t> buffer_pos;
  if (ABSL_PREDICT_FALSE(
          !WriteBuffers(header_writer, data_writer, &buffer_pos))) {
    return false;
  }

  std::string subtype_to_write;
  std::vector<uint32_t> buffer_index_to_write;
  std::vector<uint32_t> base_to_write;

  base_to_write.reserve(state_machine.size());

  if (ABSL_PREDICT_FALSE(!WriteVarint32(IntCast<uint32_t>(state_machine.size()),
                                        header_writer))) {
    return Fail(header_writer.status());
  }
  for (const StateInfo state_info : state_machine) {
    if (state_info.etag_index == kInvalidPos) {
      // `kNoOp` state.
      if (ABSL_PREDICT_FALSE(!WriteVarint32(
              static_cast<uint32_t>(chunk_encoding_internal::MessageId::kNoOp),
              header_writer))) {
        return Fail(header_writer.status());
      }
      base_to_write.push_back(state_info.base);
      continue;
    }
    const EncodedTagInfo& etag_info = tags_list_[state_info.etag_index];
    NodeId node_id = etag_info.node_id;
    chunk_encoding_internal::Subtype subtype = etag_info.subtype;
    if (node_id.tag != 0) {
      const bool is_string =
          GetTagWireType(node_id.tag) == WireType::kLengthDelimited;
      if (is_string && subtype == chunk_encoding_internal::Subtype::
                                      kLengthDelimitedStartOfSubmessage) {
        if (ABSL_PREDICT_FALSE(!WriteVarint32(
                static_cast<uint32_t>(
                    chunk_encoding_internal::MessageId::kStartOfSubmessage),
                header_writer))) {
          return Fail(header_writer.status());
        }
      } else if (is_string && subtype == chunk_encoding_internal::Subtype::
                                             kLengthDelimitedEndOfSubmessage) {
        // End of submessage is encoded as `kSubmessageWireType` instead of
        // `WireType::kLengthDelimited`.
        if (ABSL_PREDICT_FALSE(!WriteVarint32(
                node_id.tag +
                    (static_cast<uint32_t>(
                         chunk_encoding_internal::kSubmessageWireType) -
                     static_cast<uint32_t>(WireType::kLengthDelimited)),
                header_writer))) {
          return Fail(header_writer.status());
        }
      } else {
        if (ABSL_PREDICT_FALSE(!WriteVarint32(node_id.tag, header_writer))) {
          return Fail(header_writer.status());
        }
        if (chunk_encoding_internal::HasSubtype(node_id.tag)) {
          subtype_to_write.push_back(static_cast<char>(subtype));
        }
        if (chunk_encoding_internal::HasDataBuffer(node_id.tag, subtype)) {
          const absl::flat_hash_map<NodeId, uint32_t>::const_iterator iter =
              buffer_pos.find(NodeId(node_id.parent_message_id, node_id.tag));
          RIEGELI_ASSERT(iter != buffer_pos.end())
              << "Buffer not found: "
              << static_cast<uint32_t>(node_id.parent_message_id) << "/"
              << node_id.tag;
          buffer_index_to_write.push_back(iter->second);
        }
      }
    } else {
      // `kNonProto` and `kStartOfMessage` special IDs.
      if (ABSL_PREDICT_FALSE(
              !WriteVarint32(static_cast<uint32_t>(node_id.parent_message_id),
                             header_writer))) {
        return Fail(header_writer.status());
      }
      if (node_id.parent_message_id ==
          chunk_encoding_internal::MessageId::kNonProto) {
        // `kNonProto` has data buffer.
        const absl::flat_hash_map<NodeId, uint32_t>::const_iterator iter =
            buffer_pos.find(
                NodeId(chunk_encoding_internal::MessageId::kNonProto, 0));
        RIEGELI_ASSERT(iter != buffer_pos.end())
            << "Buffer of non-proto records not found";
        buffer_index_to_write.push_back(iter->second);
      } else {
        RIEGELI_ASSERT_EQ(
            static_cast<uint32_t>(node_id.parent_message_id),
            static_cast<uint32_t>(
                chunk_encoding_internal::MessageId::kStartOfMessage))
            << "Unexpected message ID with no tag";
      }
    }
    if (tags_list_[state_info.etag_index].base != kInvalidPos) {
      // Signal implicit transition by adding `state_machine.size()`.
      base_to_write.push_back(
          tags_list_[state_info.etag_index].base +
          (tags_list_[state_info.etag_index].dest_info.size() == 1
               ? IntCast<uint32_t>(state_machine.size())
               : uint32_t{0}));
    } else {
      // If there is no outgoing transition from this state, just output zero.
      base_to_write.push_back(0);
    }
  }
  for (const uint32_t value : base_to_write) {
    if (ABSL_PREDICT_FALSE(!WriteVarint32(value, header_writer))) {
      return Fail(header_writer.status());
    }
  }
  if (ABSL_PREDICT_FALSE(!header_writer.Write(std::move(subtype_to_write)))) {
    return Fail(header_writer.status());
  }
  for (const uint32_t value : buffer_index_to_write) {
    if (ABSL_PREDICT_FALSE(!WriteVarint32(value, header_writer))) {
      return Fail(header_writer.status());
    }
  }

  // Find the smallest index that has first tag.
  // Note: `encoded_tags_` is stored in reverse order so we look for the last
  // element of `encoded_tags_`.
  uint32_t first_tag_pos = 0;
  if (!encoded_tags_.empty()) {
    while (state_machine[first_tag_pos].etag_index != encoded_tags_.back()) {
      ++first_tag_pos;
    }
  }
  if (ABSL_PREDICT_FALSE(!WriteVarint32(first_tag_pos, header_writer))) {
    return Fail(header_writer.status());
  }

  chunk_encoding_internal::Compressor transitions_compressor(
      compressor_options_,
      chunk_encoding_internal::Compressor::TuningOptions()
          .set_recycling_pool_options(recycling_pool_options_));
  if (ABSL_PREDICT_FALSE(!WriteTransitions(max_transition, state_machine,
                                           transitions_compressor.writer()))) {
    return false;
  }
  if (ABSL_PREDICT_FALSE(!transitions_compressor.EncodeAndClose(data_writer))) {
    return Fail(transitions_compressor.status());
  }
  return true;
}

inline bool TransposeEncoder::WriteTransitions(
    uint32_t max_transition, absl::Span<const StateInfo> state_machine,
    Writer& transitions_writer) {
  if (encoded_tags_.empty()) return true;
  uint32_t prev_etag = encoded_tags_.back();
  uint32_t current_base = tags_list_[prev_etag].base;
  // Assuming an approximately balanced tree of `kNoOp` nodes covering
  // transitions from the given node in the state machine, the maximum number of
  // bytes needed to encode one transition should be the depth of the tree, i.e.
  // `O(log_max_transition(state_machine_size))`. We allocate buffer of size
  // `kWriteBufSize` to store the entire encoded transition.
  // For experiments with low `max_transition` we use much larger buffer then
  // needed for optimal `max_transition == 63`.
  constexpr size_t kWriteBufSize = 32;
  uint8_t write[kWriteBufSize];
  std::optional<uint8_t> last_transition;
  // Go through all transitions and encode them.
  for (uint32_t i = IntCast<uint32_t>(encoded_tags_.size() - 1); i > 0; --i) {
    // There are multiple options how transition may be encoded:
    // 1. Transition is common and it's in the private list for the previous
    //    node.
    // 2. Transition is common and is served from public list. This can have two
    //    forms:
    //      a) Previous node has no private list so we simply serve the
    //         transition using the public node list.
    //      b) Node has private list so we first make a `kNoOp` transition to
    //         the public list and then continue as above.
    uint32_t tag = encoded_tags_[i - 1];
    // Check whether this is implicit transition.
    if (tags_list_[prev_etag].dest_info.size() != 1) {
      // Position in the private list.
      uint32_t pos = tags_list_[prev_etag].dest_info[tag].pos;
      if (pos == kInvalidPos) {
        // `pos` is not in the private list, go to `public_list_noop_pos` if
        // available.
        // Otherwise base is already in the public list (option 2a).
        pos = tags_list_[prev_etag].public_list_noop_pos;
        if (pos != kInvalidPos) {
          // Option 2b.
          const uint32_t orig_pos = pos;
          size_t write_start = kWriteBufSize;
          // Encode transition from `current_base` to `public_list_noop_pos`
          // which is a `kNoOp` that would lead us to the public list.
          while (current_base > pos || pos - current_base > max_transition) {
            // While desired pos is not reachable using one transition, move to
            // `canonical_source`.
            const uint32_t cs = state_machine[pos].canonical_source;
            RIEGELI_ASSERT_LT(cs, state_machine.size())
                << "Canonical source out of range: " << pos;
            RIEGELI_ASSERT_LE(state_machine[cs].base, pos)
                << "Position unreachable from its base: " << pos;
            RIEGELI_ASSERT_LE(pos - state_machine[cs].base, max_transition)
                << "Position unreachable from its base: " << pos;
            RIEGELI_ASSERT_NE(write_start, 0u) << "Write buffer overflow";
            write[--write_start] =
                IntCast<uint8_t>(pos - state_machine[cs].base);
            pos = cs;
          }
          RIEGELI_ASSERT_NE(write_start, 0u) << "Write buffer overflow";
          write[--write_start] = IntCast<uint8_t>(pos - current_base);

          for (size_t j = write_start; j < kWriteBufSize; ++j) {
            if (write[j] == 0 && last_transition != std::nullopt &&
                (*last_transition & 3) < 3) {
              ++*last_transition;
            } else {
              if (last_transition != std::nullopt) {
                if (ABSL_PREDICT_FALSE(
                        !transitions_writer.WriteByte(*last_transition))) {
                  return Fail(transitions_writer.status());
                }
              }
              last_transition = IntCast<uint8_t>(write[j] << 2);
            }
          }
          // `current_base` is the base of the `kNoOp` that we reached using the
          // transitions so far.
          current_base = state_machine[orig_pos].base;
        }
        // `pos` becomes the position of the state in the public list.
        pos = tags_list_[tag].state_machine_pos;
      }
      RIEGELI_ASSERT_NE(current_base, kInvalidPos)
          << "No outgoing transition from current base";
      RIEGELI_ASSERT_LT(pos, state_machine.size()) << "Position out of range";
      size_t write_start = kWriteBufSize;
      // Encode transition from `current_base` to `pos`.
      while (current_base > pos || pos - current_base > max_transition) {
        // While desired pos is not reachable using one transition, move to
        // `canonical_source`.
        const uint32_t cs = state_machine[pos].canonical_source;
        RIEGELI_ASSERT_LT(cs, state_machine.size())
            << "Canonical source out of range: " << pos;
        RIEGELI_ASSERT_LE(state_machine[cs].base, pos)
            << "Position unreachable from its base: " << pos;
        RIEGELI_ASSERT_LE(pos - state_machine[cs].base, max_transition)
            << "Position unreachable from its base: " << pos;
        RIEGELI_ASSERT_NE(write_start, 0u) << "Write buffer overflow";
        write[--write_start] = IntCast<uint8_t>(pos - state_machine[cs].base);
        pos = cs;
      }
      RIEGELI_ASSERT_NE(write_start, 0u) << "Write buffer overflow";
      write[--write_start] = IntCast<uint8_t>(pos - current_base);
      for (size_t j = write_start; j < kWriteBufSize; ++j) {
        if (write[j] == 0 && last_transition != std::nullopt &&
            (*last_transition & 3) < 3) {
          ++*last_transition;
        } else {
          if (last_transition != std::nullopt) {
            if (ABSL_PREDICT_FALSE(
                    !transitions_writer.WriteByte(*last_transition))) {
              return Fail(transitions_writer.status());
            }
          }
          last_transition = IntCast<uint8_t>(write[j] << 2);
        }
      }
    } else {
      RIEGELI_ASSERT_EQ(state_machine[tags_list_[prev_etag].base].etag_index,
                        tag)
          << "Implicit transition goes to a wrong tag";
    }
    prev_etag = tag;
    current_base = tags_list_[prev_etag].base;
  }
  if (last_transition != std::nullopt) {
    if (ABSL_PREDICT_FALSE(!transitions_writer.WriteByte(*last_transition))) {
      return Fail(transitions_writer.status());
    }
  }
  return true;
}

inline void TransposeEncoder::CollectTransitionStatistics() {
  // Go through all the transitions from back to front and collect transition
  // distribution statistics.
  uint32_t prev_pos = encoded_tags_.back();
  for (size_t i = encoded_tags_.size() - 1; i > 0; --i) {
    const uint32_t pos = encoded_tags_[i - 1];
    ++tags_list_[prev_pos].dest_info[pos].num_transitions;
    ++tags_list_[pos].num_incoming_transitions;
    prev_pos = pos;
  }

  if (tags_list_[encoded_tags_.back()].num_incoming_transitions == 0) {
    // This guarantees that the initial state is created even if it has no other
    // incoming transition.
    tags_list_[encoded_tags_.back()].num_incoming_transitions = 1;
  }
}

inline void TransposeEncoder::ComputeBaseIndices(
    uint32_t max_transition, uint32_t public_list_base,
    absl::Span<const std::pair<uint32_t, uint32_t>> public_list_noops,
    std::vector<StateInfo>& state_machine) {
  // The related transitions reach a state in the public list so the valid
  // approach would be to simply set all of these to `public_list_base`.
  // However, we observe that most of the tags only target few destinations so
  // we can do better if we find the base that is closer to reachable states.
  //
  // We do this by computing `base` of the block that can reach all required
  // destination and `min_pos` of the state that is used in any such transition.

  // Compute `base` indices for `kNoOp` states.
  for (const std::pair<uint32_t, uint32_t>& tag_index_and_state_index :
       public_list_noops) {
    // Start of block that can reach all required destinations.
    uint32_t base = kInvalidPos;
    // Smallest position of node used in transition.
    uint32_t min_pos = kInvalidPos;
    for (const std::pair<const uint32_t, DestInfo>& dest_info :
         tags_list_[tag_index_and_state_index.first].dest_info) {
      uint32_t pos = dest_info.second.pos;
      if (pos != kInvalidPos) {
        // This tag has a node in the private list.
        continue;
      }
      // Position of the state that we need to reach.
      pos = tags_list_[dest_info.first].state_machine_pos;
      RIEGELI_ASSERT_NE(pos, kInvalidPos) << "Invalid position";
      // Assuming we processed some states already and `base` is already set to
      // non-`kInvalidPos` we find the base of the block that is the common
      // ancestor for both `pos` and current `base`.
      // If `base <= pos && pos - base <= max_transition` then `pos` can
      // be encoded from `base` using one byte and `base` starts the block we
      // are looking for. If this is not the case then either:
      //  - `base > pos` and `pos` is reachable from one of the common ancestor
      //    blocks of `base` and `pos`. In that case we move `base` to the
      //    parent block of `base`.
      //  - `pos - base > max_transition` and to reach `pos` we need more than
      //    one transition. In that case we ensure the reachability of `pos` by
      //    ensuring reachability of its `canonical_source` which belongs to the
      //    parent block of `pos`.
      // Note: We assume that transitions in the public list always go from
      // lower to higher indices. This is ensured by the public list generation
      // code.
      // Note: If `base` is `kInvalidPos`, the condition `base > pos` is true
      // and we handle the first state in there.
      while (base > pos || pos - base > max_transition) {
        if (base > pos) {
          // `cs` is the `canonical_source` that leads to the block we are
          // looking for.
          uint32_t cs;
          if (base == kInvalidPos) {
            // `base` not initialized yet. We'll use canonical_source of `pos`.
            cs = state_machine[pos].canonical_source;
          } else {
            // Set `cs` to the `kNoOp` that leads to `base`.
            cs = state_machine[base].canonical_source;
            // If `cs` is `kInvalidPos` then `base` was already in the first
            // block. But then `base > pos` can't be true.
            RIEGELI_ASSERT_NE(cs, kInvalidPos) << "Unreachable base: " << base;
            // Transitions to previously added states will use `cs` so we update
            // `min_pos`.
            min_pos = UnsignedMin(min_pos, cs);
            // To find `base` the block that contains `cs` we move one level
            // above.
            cs = state_machine[cs].canonical_source;
          }
          if (cs == kInvalidPos) {
            // No `canonical_source` means `base` is in the first block.
            base = public_list_base;
          } else {
            // Otherwise it's the base of the current `cs`.
            base = state_machine[cs].base;
          }
        } else {
          // Update `pos` to `canonical_source` of `pos`.
          const uint32_t cs = state_machine[pos].canonical_source;
          RIEGELI_ASSERT_LT(cs, state_machine.size())
              << "Canonical source out of range: " << pos;
          RIEGELI_ASSERT_LE(state_machine[cs].base, pos)
              << "Position unreachable from its base: " << pos;
          RIEGELI_ASSERT_LE(pos - state_machine[cs].base, max_transition)
              << "Position unreachable from its base: " << pos;
          pos = cs;
        }
      }
      min_pos = UnsignedMin(min_pos, pos);
    }
    RIEGELI_ASSERT_NE(min_pos, kInvalidPos)
        << "No outgoing transition from a public NoOp";
    state_machine[tag_index_and_state_index.second].base = min_pos;
  }

  // The same as above for tags without private list.
  for (EncodedTagInfo& tag : tags_list_) {
    if (tag.base != kInvalidPos) {
      // Skip tags with private list.
      continue;
    }
    uint32_t base = kInvalidPos;
    uint32_t min_pos = kInvalidPos;
    for (const std::pair<const uint32_t, DestInfo>& dest_info : tag.dest_info) {
      uint32_t pos = dest_info.second.pos;
      if (pos != kInvalidPos) {
        // Skip destinations in the private list.
        continue;
      }
      pos = tags_list_[dest_info.first].state_machine_pos;
      RIEGELI_ASSERT_NE(pos, kInvalidPos) << "Invalid position";
      while (base > pos || pos - base > max_transition) {
        if (base > pos) {
          uint32_t cs;
          if (base == kInvalidPos) {
            cs = state_machine[pos].canonical_source;
          } else {
            cs = state_machine[base].canonical_source;
            RIEGELI_ASSERT_NE(cs, kInvalidPos) << "Unreachable base: " << base;
            min_pos = UnsignedMin(min_pos, cs);
            cs = state_machine[cs].canonical_source;
          }
          if (cs == kInvalidPos) {
            base = public_list_base;
          } else {
            base = state_machine[cs].base;
          }
        } else {
          const uint32_t cs = state_machine[pos].canonical_source;
          RIEGELI_ASSERT_LT(cs, state_machine.size())
              << "Canonical source out of range: " << pos;
          RIEGELI_ASSERT_LE(state_machine[cs].base, pos)
              << "Position unreachable from its base: " << pos;
          RIEGELI_ASSERT_LE(pos - state_machine[cs].base, max_transition)
              << "Position unreachable from its base: " << pos;
          pos = cs;
        }
      }
      min_pos = UnsignedMin(min_pos, pos);
    }
    if (min_pos != kInvalidPos) tag.base = min_pos;
  }
}

inline std::vector<TransposeEncoder::StateInfo>
TransposeEncoder::CreateStateMachine(uint32_t max_transition,
                                     uint32_t min_count_for_state) {
  std::vector<StateInfo> state_machine;
  if (encoded_tags_.empty()) {
    state_machine.emplace_back(kInvalidPos, 0);
    return state_machine;
  }

  CollectTransitionStatistics();

  // Go through all the tag infos and update transitions that will be included
  // in the private list for the node.
  constexpr uint32_t kInListPos = 0;
  for (EncodedTagInfo& tag_info : tags_list_) {
    for (std::pair<const uint32_t, DestInfo>& dest_and_count :
         tag_info.dest_info) {
      if (dest_and_count.second.num_transitions >= min_count_for_state) {
        // Subtract transitions so we have the right estimate of the remaining
        // transitions into each node.
        tags_list_[dest_and_count.first].num_incoming_transitions -=
            dest_and_count.second.num_transitions;
        // Mark transition to be included in list.
        dest_and_count.second.pos = kInListPos;
      }
    }
  }

  // Priority_queue to order nodes by transition count.
  std::priority_queue<PriorityQueueEntry> tag_priority;
  // Pair of `(tag_index, noop_position)` where `noop_position` is the index of
  // the `kNoOp` state created for this tag that has base index in the public
  // node list.
  std::vector<std::pair<uint32_t, uint32_t>> public_list_noops;
  // Helper vector to track the base index for `kNoOp` nodes added in the loop
  // below.
  std::vector<uint32_t> noop_base;
  // Create private lists of states for all nodes that have one.
  // After this loop:
  //  - `state_machine` will contain states of created private lists.
  //  - `base` in `tags_list_` will be set for tags with private list.
  //  - `dest_info` in `tags_list_` will have `pos != kInvalidPos` for those
  //    nodes that already have state.
  //  - `public_list_noops` will have a record for all `kNoOp` states reaching
  //    public list.
  for (uint32_t tag_id = 0; tag_id < tags_list_.size(); ++tag_id) {
    EncodedTagInfo& tag_info = tags_list_[tag_id];
    const uint32_t sz = IntCast<uint32_t>(tag_info.dest_info.size());
    // If we exclude just one state we add it instead of creating the `kNoOp`
    // state.
    PriorityQueueEntry excluded_state;
    // Number of transitions into public list states.
    uint32_t num_excluded_transitions = 0;
    for (const std::pair<const uint32_t, DestInfo>& dest_info :
         tag_info.dest_info) {
      // If destination was marked as `kInListPos` or all transitions into it go
      // from this node.
      if (dest_info.second.pos == kInListPos ||
          dest_info.second.num_transitions ==
              tags_list_[dest_info.first].num_incoming_transitions) {
        if (dest_info.second.pos != kInListPos) {
          // Not yet subtracted.
          tags_list_[dest_info.first].num_incoming_transitions -=
              dest_info.second.num_transitions;
        }
        // Add to the priority queue.
        tag_priority.emplace(dest_info.first, dest_info.second.num_transitions);
      } else {
        num_excluded_transitions += dest_info.second.num_transitions;
        excluded_state = PriorityQueueEntry(dest_info.first,
                                            dest_info.second.num_transitions);
      }
    }
    uint32_t num_states = IntCast<uint32_t>(tag_priority.size());
    if (num_states == 0) {
      // No private list for this tag.
      continue;
    }
    if (num_states + 1 == sz) {
      // If only one state would go to the public list, just add it.
      ++num_states;
      tag_priority.push(excluded_state);
      tags_list_[excluded_state.dest_index].num_incoming_transitions -=
          excluded_state.num_transitions;
    }
    if (num_states != sz) {
      // If not all nodes are in the private list, we'll need `kNoOp` into the
      // public list.
      tag_priority.emplace(kInvalidPos, num_excluded_transitions);
      ++num_states;
    }
    // Update `base` for this tag.
    tag_info.base = IntCast<uint32_t>(state_machine.size());
    // Number of `kNoOp` nodes for transitions that can't be encoded using one
    // byte.
    const uint32_t noop_nodes = num_states <= max_transition + 1
                                    ? uint32_t{0}
                                    : (num_states - 2) / max_transition;
    num_states += noop_nodes;
    // We create states back to front. After loop below there will be
    // `state_machine.size() + num_states` states.
    uint32_t prev_state = IntCast<uint32_t>(state_machine.size()) + num_states;
    state_machine.resize(prev_state);
    // States are created in blocks. All blocks except the last one have
    // `max_transition + 1` states. `block_size` is initialized to the size of
    // the last block.
    uint32_t block_size = (num_states - 1) % (max_transition + 1) + 1;
    noop_base.clear();
    for (;;) {
      // Sum of all `num_transitions` into this block. It will be used as the
      // weight of the `kNoOp` created for this block.
      uint32_t total_block_nodes_weight = 0;
      for (uint32_t i = 0; i < block_size; ++i) {
        RIEGELI_ASSERT(!tag_priority.empty()) << "No remaining nodes";
        total_block_nodes_weight += tag_priority.top().num_transitions;
        const uint32_t node_index = tag_priority.top().dest_index;
        if (node_index == kInvalidPos) {
          // `kNoOp` that goes to the public list.
          state_machine[--prev_state] = StateInfo(kInvalidPos, kInvalidPos);
          tag_info.public_list_noop_pos = prev_state;
          public_list_noops.emplace_back(tag_id, prev_state);
        } else if (node_index >= tags_list_.size()) {
          // `kNoOp` that goes to private list.
          const uint32_t base = noop_base[node_index - tags_list_.size()];
          state_machine[--prev_state] = StateInfo(kInvalidPos, base);
          // Update canonical source for block that this node serves.
          for (uint32_t j = 0; j <= max_transition; ++j) {
            if (j + base >= state_machine.size()) break;
            state_machine[j + base].canonical_source = prev_state;
          }
        } else {
          // Regular state.
          state_machine[--prev_state] = StateInfo(node_index, kInvalidPos);
          tag_info.dest_info[node_index].pos = prev_state;
        }
        tag_priority.pop();
      }
      if (tag_priority.empty()) break;
      // Add new `kNoOp` node into `tag_priority` to serve the block that was
      // just created. Use position greater than `tags_list_.size()` to
      // distinguish it from both regular state and `public_list_noop`.
      tag_priority.emplace(tags_list_.size() + noop_base.size(),
                           total_block_nodes_weight);
      // Set the base to the start of the block.
      noop_base.push_back(prev_state);
      // All remaining blocks are `max_transition + 1` states long.
      block_size = max_transition + 1;
    }
  }

  // Base index of the public state list.
  const uint32_t public_list_base = IntCast<uint32_t>(state_machine.size());

  // Add all tags with non-zero incoming transition count to the priority queue.
  for (uint32_t i = 0; i < tags_list_.size(); ++i) {
    if (tags_list_[i].num_incoming_transitions != 0) {
      tag_priority.emplace(i, tags_list_[i].num_incoming_transitions);
    }
  }

  // Create a public list of states. The loop is similar to the public list
  // creation above.
  // After this loop:
  //  - All states in the state machine are created.
  //  - All tags that have an state in the public list have `state_machine_pos`
  //    set.
  uint32_t num_states = IntCast<uint32_t>(tag_priority.size());
  if (num_states > 0) {
    const uint32_t noop_nodes = num_states <= max_transition + 1
                                    ? uint32_t{0}
                                    : (num_states - 2) / max_transition;
    num_states += noop_nodes;
    // Note: The code that assigns `base` indices to states assumes that all
    // `kNoOp` transitions to the child block increase the state index. This is
    // ensured by creating the blocks in reverse order.
    uint32_t prev_node = IntCast<uint32_t>(state_machine.size()) + num_states;
    state_machine.resize(prev_node);
    uint32_t block_size = (num_states - 1) % (max_transition + 1) + 1;
    noop_base.clear();
    for (;;) {
      uint32_t total_block_nodes_weight = 0;
      for (uint32_t i = 0; i < block_size; ++i) {
        RIEGELI_ASSERT(!tag_priority.empty()) << "No remaining nodes";
        total_block_nodes_weight += tag_priority.top().num_transitions;
        const uint32_t node_index = tag_priority.top().dest_index;
        if (node_index >= tags_list_.size()) {
          // `kNoOp` state.
          const uint32_t base = noop_base[node_index - tags_list_.size()];
          state_machine[--prev_node] = StateInfo(kInvalidPos, base);
          for (uint32_t j = 0; j <= max_transition; ++j) {
            if (j + base >= state_machine.size()) break;
            state_machine[j + base].canonical_source = prev_node;
          }
        } else {
          // Regular state.
          state_machine[--prev_node] = StateInfo(node_index, kInvalidPos);
          tags_list_[node_index].state_machine_pos = prev_node;
        }
        tag_priority.pop();
      }
      if (tag_priority.empty()) break;
      tag_priority.emplace(tags_list_.size() + noop_base.size(),
                           total_block_nodes_weight);
      noop_base.push_back(prev_node);
      block_size = max_transition + 1;
    }
  }

  // At this point, the only thing missing is the `base` index for tags without
  // a private list and for `kNoOp` nodes that go to public list.
  ComputeBaseIndices(max_transition, public_list_base, public_list_noops,
                     state_machine);

  return state_machine;
}

// Maximum transition number. Transitions are encoded as values in the range
// [0..`kMaxTransition`].
constexpr uint32_t kMaxTransition = 63;
// Minimum number of transitions between nodes A and B for state for node B to
// appear in the private state list for node A.
constexpr uint32_t kMinCountForState = 10;

bool TransposeEncoder::EncodeAndClose(Writer& dest, ChunkType& chunk_type,
                                      uint64_t& num_records,
                                      uint64_t& decoded_data_size) {
  chunk_type = ChunkType::kTransposed;
  return EncodeAndCloseInternal(kMaxTransition, kMinCountForState, dest,
                                num_records, decoded_data_size);
}

bool TransposeEncoder::EncodeAndCloseInternal(uint32_t max_transition,
                                              uint32_t min_count_for_state,
                                              Writer& dest,
                                              uint64_t& num_records,
                                              uint64_t& decoded_data_size) {
  RIEGELI_ASSERT_LE(max_transition, 63u)
      << "Failed precondition of TransposeEncoder::EncodeAndCloseInternal(): "
         "maximum transition too large to encode";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  num_records = num_records_;
  decoded_data_size = decoded_data_size_;
  for (const std::pair<const NodeId, MessageNode>& entry : message_nodes_) {
    if (entry.second.writer != nullptr) {
      if (ABSL_PREDICT_FALSE(!entry.second.writer->Close())) {
        return Fail(entry.second.writer->status());
      }
    }
  }
  if (ABSL_PREDICT_FALSE(!nonproto_lengths_writer_.Close())) {
    return Fail(nonproto_lengths_writer_.status());
  }

  if (ABSL_PREDICT_FALSE(!dest.WriteByte(
          static_cast<uint8_t>(compressor_options_.compression_type())))) {
    return Fail(dest.status());
  }

  const std::vector<StateInfo> state_machine =
      CreateStateMachine(max_transition, min_count_for_state);

  ChainWriter<Chain> header_writer;
  ChainWriter<Chain> data_writer;
  if (ABSL_PREDICT_FALSE(!WriteStatesAndData(max_transition, state_machine,
                                             header_writer, data_writer))) {
    return false;
  }
  if (ABSL_PREDICT_FALSE(!header_writer.Close())) {
    return Fail(header_writer.status());
  }
  if (ABSL_PREDICT_FALSE(!data_writer.Close())) {
    return Fail(data_writer.status());
  }

  chunk_encoding_internal::Compressor header_compressor(
      compressor_options_,
      chunk_encoding_internal::Compressor::TuningOptions()
          .set_pledged_size(header_writer.dest().size())
          .set_recycling_pool_options(recycling_pool_options_));
  if (ABSL_PREDICT_FALSE(
          !header_compressor.writer().Write(std::move(header_writer.dest())))) {
    return Fail(header_compressor.writer().status());
  }
  if (ABSL_PREDICT_FALSE(
          !header_compressor.LengthPrefixedEncodeAndClose(dest))) {
    return Fail(header_compressor.status());
  }
  if (ABSL_PREDICT_FALSE(!dest.Write(std::move(data_writer.dest())))) {
    return Fail(dest.status());
  }
  return Close();
}

}  // namespace riegeli


================================================
FILE: riegeli/chunk_encoding/transpose_encoder.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CHUNK_ENCODING_TRANSPOSE_ENCODER_H_
#define RIEGELI_CHUNK_ENCODING_TRANSPOSE_ENCODER_H_

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <memory>
#include <optional>
#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/container/flat_hash_map.h"
#include "absl/container/inlined_vector.h"
#include "absl/strings/cord.h"
#include "absl/types/span.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/chain_backward_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/chunk_encoder.h"
#include "riegeli/chunk_encoding/compressor.h"
#include "riegeli/chunk_encoding/compressor_options.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/chunk_encoding/transpose_internal.h"

namespace riegeli {

// Format (values are varint encoded unless indicated otherwise):
//  - Compression type
//  - Header length (compressed length if applicable)
//  - Header (possibly compressed):
//    - Number of separately compressed buckets that data buffers are split into
//      [`num_buckets`]
//    - Number of data buffers [`num_buffers`]
//    - Array of `num_buckets` varints: sizes of buckets (compressed size
//      if applicable)
//    - Array of `num_buffers` varints: lengths of buffers (uncompressed)
//    - Number of state machine states [`num_state`]
//    - States encoded in 4 blocks:
//      - Array of `num_state` Tags/ReservedIDs
//      - Array of `num_state` next node indices
//      - Array of subtypes (for all tags where applicable)
//      - Array of data buffer indices (for all tags/subtypes where applicable)
//    - Initial state index
//  - `num_buckets` buckets:
//    - Bucket data (possibly compressed):
//      - Concatenated data buffers in this bucket (bytes)
//  - Transitions (possibly compressed):
//    - State machine transitions (bytes)
class TransposeEncoder : public ChunkEncoder {
 public:
  class TuningOptions {
   public:
    TuningOptions() noexcept {}

    // The default approximate bucket size, used if compression is enabled.
    // Finer bucket granularity (i.e. smaller size) worsens compression density
    // but makes field projection more effective.
    //
    // Default: `std::numeric_limits<uint64_t>::max()`.
    TuningOptions& set_bucket_size(uint64_t bucket_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      bucket_size_ = bucket_size;
      return *this;
    }
    TuningOptions&& set_bucket_size(uint64_t bucket_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_bucket_size(bucket_size));
    }
    uint64_t bucket_size() const { return bucket_size_; }

    // Options for a global `RecyclingPool` of compression contexts.
    //
    // They tune the amount of memory which is kept to speed up creation of new
    // compression sessions, and usage of a background thread to clean it.
    //
    // Default: `RecyclingPoolOptions()`.
    TuningOptions& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      recycling_pool_options_ = recycling_pool_options;
      return *this;
    }
    TuningOptions&& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_recycling_pool_options(recycling_pool_options));
    }
    const RecyclingPoolOptions& recycling_pool_options() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recycling_pool_options_;
    }

   private:
    uint64_t bucket_size_ = std::numeric_limits<uint64_t>::max();
    RecyclingPoolOptions recycling_pool_options_;
  };

  // Creates an empty `TransposeEncoder`.
  explicit TransposeEncoder(CompressorOptions options,
                            TuningOptions tuning_options = TuningOptions());

  ~TransposeEncoder();

  void Clear() override;

  // `record` should be a protocol message in binary format. Transpose works
  // just fine even if `record` is a corrupted protocol message or an arbitrary
  // string. Such records are internally stored separately -- these are not
  // broken down into columns.
  using ChunkEncoder::AddRecord;
  bool AddRecord(BytesRef record) override;
  bool AddRecord(ExternalRef record) override;
  bool AddRecord(const Chain& record) override;
  bool AddRecord(const absl::Cord& record) override;

  bool AddRecords(Chain records, std::vector<size_t> limits) override;

  bool EncodeAndClose(Writer& dest, ChunkType& chunk_type,
                      uint64_t& num_records,
                      uint64_t& decoded_data_size) override;

 private:
  bool AddRecordInternal(Reader& record);

  // Encode messages added with `AddRecord()` calls and write the result to
  // `dest`.
  bool EncodeAndCloseInternal(uint32_t max_transition,
                              uint32_t min_count_for_state, Writer& dest,
                              uint64_t& num_records,
                              uint64_t& decoded_data_size);

  // Types of data buffers protocol buffer fields are split into.
  // The buffer type information is not used in any way except to group similar
  // buffers together hoping that this helps with compression context modeling.
  enum class BufferType : int {
    // Varint-encoded numbers, with the highest bit (signifying end of number)
    // stripped.
    kVarint,
    // Fixed width 32bit integer or floating point numbers.
    kFixed32,
    // Fixed width 64bit integer or floating point numbers.
    kFixed64,
    // String length + data.
    kString,
    // All non-proto messages.
    kNonProto,
    kNumBufferTypes,
  };

  static constexpr size_t kNumBufferTypes =
      static_cast<size_t>(BufferType::kNumBufferTypes);

  // Information about a field with unique proto path.
  struct MessageNode {
    explicit MessageNode(chunk_encoding_internal::MessageId message_id);
    // Some nodes (such as `kStartGroup`) contain no data. Buffer is assigned in
    // the first `GetBuffer()` call when we have data to write.
    std::unique_ptr<BackwardWriter> writer;
    // Unique ID for every instance of this class within `TransposeEncoder`.
    chunk_encoding_internal::MessageId message_id;
    // Position of encoded tag in `tags_list_` per subtype.
    // Size 14 works well with `kMaxVarintInline == 3`.
    absl::InlinedVector<uint32_t, 14> encoded_tag_pos;
  };

  // We build a tree structure of protocol buffer tags. `NodeId` uniquely
  // identifies a node in this tree.
  struct NodeId : WithEqual<NodeId> {
    explicit NodeId(chunk_encoding_internal::MessageId parent_message_id,
                    uint32_t tag);

    friend bool operator==(NodeId a, NodeId b) {
      return a.parent_message_id == b.parent_message_id && a.tag == b.tag;
    }
    template <typename HashState>
    friend HashState AbslHashValue(HashState hash_state, NodeId self) {
      return HashState::combine(std::move(hash_state), self.parent_message_id,
                                self.tag);
    }

    chunk_encoding_internal::MessageId parent_message_id;
    uint32_t tag;
  };

  // Add message recursively to the internal data structures.
  // Precondition: `message` is a valid proto message, i.e. `IsProtoMessage()`
  // on this message returns `true`.
  bool AddMessage(Reader& record);

  // Write all buffer lengths to `header_writer` and data buffers in `data_` to
  // `data_writer` (compressed using `compressor_`). Fill map with the
  // sequential position of each buffer written.
  bool WriteBuffers(Writer& header_writer, Writer& data_writer,
                    absl::flat_hash_map<NodeId, uint32_t>* buffer_pos);

  // One state of the state machine created in encoder.
  struct StateInfo {
    StateInfo();
    explicit StateInfo(uint32_t etag_index, uint32_t base);
    // Index of the `encoded_tag` in `tags_list_` represented by this state.
    // `kInvalidPos` for `kNoOp` states.
    uint32_t etag_index;
    // Base index of this state. Transitions from this state can target only
    // states in the range [`base`..`base + kMaxTransition`].
    // `kInvalidPos` if no outgoing transition.
    uint32_t base;
    // Usual source of transition into this state. Set if there is `kNoOp`
    // generated to reach a group of states including this one.
    // `kInvalidPos` if no such state.
    uint32_t canonical_source;
  };

  // Add `buffer` to `bucket_compressor.writer()`.
  // If `new_uncompressed_bucket_size` is not `std::nullopt`, flush the current
  // bucket to `data_writer` first and create a new bucket of that size.
  bool AddBuffer(std::optional<size_t> new_uncompressed_bucket_size,
                 const Chain& buffer,
                 chunk_encoding_internal::Compressor& bucket_compressor,
                 Writer& data_writer,
                 std::vector<size_t>& compressed_bucket_sizes,
                 std::vector<size_t>& buffer_sizes);

  // Compute base indices for states in `state_machine` that don't have one yet.
  // `public_list_base` is the index of the start of the public list.
  // `public_list_noops` is the list of `kNoOp` states that don't have a base
  // set yet. It contains pairs of (`tag_index`, `state_index`).
  void ComputeBaseIndices(
      uint32_t max_transition, uint32_t public_list_base,
      absl::Span<const std::pair<uint32_t, uint32_t>> public_list_noops,
      std::vector<StateInfo>& state_machine);

  // Traverse `encoded_tags_` and populate `num_incoming_transitions` and
  // `dest_info` in `tags_list_` based on transition distribution.
  void CollectTransitionStatistics();

  // Create a state machine for `encoded_tags_`.
  std::vector<StateInfo> CreateStateMachine(uint32_t max_transition,
                                            uint32_t min_count_for_state);

  // Write state machine states into `header_writer` and all data buffers and
  // transitions into `data_writer` (compressed using `compressor_`).
  bool WriteStatesAndData(uint32_t max_transition,
                          absl::Span<const StateInfo> state_machine,
                          Writer& header_writer, Writer& data_writer);

  // Write all state machine transitions from `encoded_tags_` into
  // `compressor_.writer()`.
  bool WriteTransitions(uint32_t max_transition,
                        absl::Span<const StateInfo> state_machine,
                        Writer& transitions_writer);

  // Value type of node in Nodes map.
  using Node = absl::flat_hash_map<NodeId, MessageNode>::value_type;

  // Returns node pointer from `node_id`.
  Node* GetNode(NodeId node_id);

  // Get possition of the (`node`, `subtype`) pair in `tags_list_`, adding it
  // if not in the list yet.
  uint32_t GetPosInTagsList(Node* node,
                            chunk_encoding_internal::Subtype subtype);

  // Get `BackwardWriter` for node. `type` is used to select the right category
  // for the buffer if not created yet.
  BackwardWriter* GetBuffer(Node* node, BufferType type);

  // Information about the state machine transition destination.
  struct DestInfo {
    DestInfo();
    // Position of the destination in destination list created for this state.
    // `kInvalidPos` if transition destination is not in the list. In that case
    // transition is encoded using the public list of states.
    uint32_t pos;
    // Number of transition to this destination.
    size_t num_transitions = 0;
  };

  // Information about encoded tag.
  struct EncodedTagInfo {
    explicit EncodedTagInfo(NodeId node_id,
                            chunk_encoding_internal::Subtype subtype);
    NodeId node_id;
    chunk_encoding_internal::Subtype subtype;
    // Maps all destinations reachable from this encoded tag to `DestInfo`.
    absl::flat_hash_map<uint32_t, DestInfo> dest_info;
    // Number of incoming tranitions into this state.
    size_t num_incoming_transitions = 0;
    // Index of this state in the state machine.
    uint32_t state_machine_pos;
    // Position of `kNoOp` node in the private list that has base in public
    // list. If outgoing transitions are split into frequent and infrequent, a
    // list of frequent destinations is created and `kNoOp` node is added that
    // serves infrequent transitions.
    uint32_t public_list_noop_pos;
    // Base index of this encoded tag. Transitions from this tag can target only
    // states in the range [`base`..`base + kMaxTransition`].
    // `kInvalidPos` if no outgoing transition.
    uint32_t base;
  };

  // Information about the data buffer.
  struct BufferWithMetadata {
    explicit BufferWithMetadata(NodeId node_id);
    // Buffer itself, wrapped in `std::unique_ptr` so that its address remains
    // constant when additional buffers are added.
    std::unique_ptr<Chain> buffer;
    // `NodeId` this buffer belongs to.
    NodeId node_id;
  };

  // Information about a submessage.
  struct MessageFrame {
    uint32_t end_of_submessage_pos;
    chunk_encoding_internal::MessageId parent_message_id;
    size_t parent_max_record_pos;
  };

  CompressorOptions compressor_options_;
  // The default approximate bucket size, used if compression is enabled.
  // Finer bucket granularity (i.e. smaller size) worsens compression density
  // but makes field projection more effective.
  uint64_t bucket_size_;
  RecyclingPoolOptions recycling_pool_options_;

  // List of all distinct Encoded tags.
  std::vector<EncodedTagInfo> tags_list_;
  // Sequence of tags on input as indices into `tags_list_`.
  std::vector<uint32_t> encoded_tags_;
  // Data buffers in separate vectors per buffer type.
  std::vector<BufferWithMetadata> data_[kNumBufferTypes];
  // Stack of open submessages.
  std::vector<MessageFrame> message_stack_;
  // Every group creates a new message ID. We keep track of open groups in this
  // vector.
  std::vector<chunk_encoding_internal::MessageId> group_stack_;
  // Tree of message nodes.
  absl::flat_hash_map<NodeId, MessageNode> message_nodes_;
  ChainBackwardWriter<Chain> nonproto_lengths_writer_;
  // Counter used to assign unique IDs to the message nodes.
  chunk_encoding_internal::MessageId next_message_id_ =
      chunk_encoding_internal::MessageId::kRoot + 1;
};

}  // namespace riegeli

#endif  // RIEGELI_CHUNK_ENCODING_TRANSPOSE_ENCODER_H_


================================================
FILE: riegeli/chunk_encoding/transpose_internal.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CHUNK_ENCODING_TRANSPOSE_INTERNAL_H_
#define RIEGELI_CHUNK_ENCODING_TRANSPOSE_INTERNAL_H_

#include <stdint.h>

#include "riegeli/base/assert.h"
#include "riegeli/messages/message_wire_format.h"
#include "riegeli/varint/varint_writing.h"

namespace riegeli::chunk_encoding_internal {

enum class MessageId : uint32_t {
  kNoOp,
  kNonProto,
  kStartOfSubmessage,
  kStartOfMessage,
  // `kRoot` marks the root node in memory. It is never encoded.
  kRoot,
  // Remaining message ids are assigned sequentially one per `NodeId`.
};

inline MessageId operator+(MessageId a, uint32_t b) {
  return static_cast<MessageId>(static_cast<uint32_t>(a) + b);
}

inline MessageId& operator++(MessageId& a) { return a = a + 1; }

static_assert(static_cast<uint32_t>(MessageId::kRoot) <= 8,
              "Reserved ids must not overlap valid proto tags");

// `kSubmessageWireType` does marks the end of a submessage, distinguishing it
// from the end of a string or bytes field, which is encoded using
// `WireType::kLengthDelimited`.
inline constexpr WireType kSubmessageWireType = static_cast<WireType>(6);

enum class Subtype : uint8_t {
  kTrivial = 0,

  // Subtypes of `WireType::kVarint`:
  // Varint of the given length, in the buffer.
  kVarint1 = 0,
  kVarintMax = static_cast<uint8_t>(kVarint1) + kMaxLengthVarint64 - 1,
  // Varint of the given value, inline.
  kVarintInline0 = static_cast<uint8_t>(kVarintMax) + 1,
  kVarintInlineMax = static_cast<uint8_t>(kVarintInline0) + 0x7f,

  // Subtypes of `WireType::kLengthDelimited`:
  kLengthDelimitedString = 0,
  kLengthDelimitedStartOfSubmessage = 1,
  kLengthDelimitedEndOfSubmessage = 2,
};

inline Subtype operator+(Subtype a, uint8_t b) {
  return static_cast<Subtype>(static_cast<uint8_t>(a) + b);
}

inline uint8_t operator-(Subtype a, Subtype b) {
  return static_cast<uint8_t>(a) - static_cast<uint8_t>(b);
}

// Returns whether `tag`/`subtype` pair has a data buffer.
// Precondition: `tag` is a valid proto tag.
inline bool HasDataBuffer(uint32_t tag, Subtype subtype) {
  switch (GetTagWireType(tag)) {
    case WireType::kVarint:
      // Protocol buffer has buffer if value is not inlined.
      return subtype < Subtype::kVarintInline0;
    case WireType::kFixed32:
    case WireType::kFixed64:
      return true;
    case WireType::kLengthDelimited:
      // If subtype is `kLengthDelimitedStartOfSubmessage` or
      // `kLengthDelimitedEndOfSubmessage`, we have no buffer.
      return subtype == Subtype::kLengthDelimitedString;
    case WireType::kStartGroup:
    case WireType::kEndGroup:
      return false;
    default:
      RIEGELI_ASSUME_UNREACHABLE() << "Unknown wire type in " << tag;
  }
}

// Returns `true` if this tag is followed by subtype.
// Precondition: `tag` is a valid proto tag.
inline bool HasSubtype(uint32_t tag) {
  switch (GetTagWireType(tag)) {
    case WireType::kVarint:
      return true;
      // A `kLengthDelimited` tag is not followed by subtype, even though
      // `kLengthDelimited` nodes have subtypes, because submessage start is
      // encoded as `MessageId::kStartOfSubmessage`, and submessage end is
      // encoded with `kSubmessageWireType` that is taken into account before
      // calling this method.
    case WireType::kFixed32:
    case WireType::kFixed64:
    case WireType::kLengthDelimited:
    case WireType::kStartGroup:
    case WireType::kEndGroup:
      return false;
    default:
      RIEGELI_ASSUME_UNREACHABLE() << "Unknown wire type in " << tag;
  }
}

}  // namespace riegeli::chunk_encoding_internal

#endif  // RIEGELI_CHUNK_ENCODING_TRANSPOSE_INTERNAL_H_


================================================
FILE: riegeli/containers/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "linear_sorted_string_set",
    srcs = ["linear_sorted_string_set.cc"],
    hdrs = ["linear_sorted_string_set.h"],
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:compact_string",
        "//riegeli/base:compare",
        "//riegeli/base:debug",
        "//riegeli/base:dependency",
        "//riegeli/base:iterable",
        "//riegeli/base:optional_compact_string",
        "//riegeli/base:stream_utils",
        "//riegeli/bytes:compact_string_writer",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:writer",
        "//riegeli/endian:endian_reading",
        "//riegeli/varint:varint_reading",
        "//riegeli/varint:varint_writing",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/numeric:bits",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "chunked_sorted_string_set",
    srcs = ["chunked_sorted_string_set.cc"],
    hdrs = ["chunked_sorted_string_set.h"],
    deps = [
        ":linear_sorted_string_set",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:binary_search",
        "//riegeli/base:compact_string",
        "//riegeli/base:compare",
        "//riegeli/base:debug",
        "//riegeli/base:dependency",
        "//riegeli/base:iterable",
        "//riegeli/base:memory_estimator",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:writer",
        "//riegeli/varint:varint_reading",
        "//riegeli/varint:varint_writing",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/status:statusor",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ],
)


================================================
FILE: riegeli/containers/chunked_sorted_string_set.cc
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/containers/chunked_sorted_string_set.h"

#include <stddef.h>
#include <stdint.h>

#include <algorithm>
#include <initializer_list>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/binary_search.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/debug.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/containers/linear_sorted_string_set.h"
#include "riegeli/varint/varint_reading.h"
#include "riegeli/varint/varint_writing.h"

namespace riegeli {

ChunkedSortedStringSet ChunkedSortedStringSet::FromSorted(
    std::initializer_list<absl::string_view> src, Options options) {
  return FromSorted<>(src, std::move(options));
}

ChunkedSortedStringSet ChunkedSortedStringSet::FromUnsorted(
    std::initializer_list<absl::string_view> src, Options options) {
  return FromUnsorted<>(src, std::move(options));
}

inline ChunkedSortedStringSet::ChunkedSortedStringSet(Chunks&& chunks)
    : chunks_(std::move(chunks)) {}

bool ChunkedSortedStringSet::ContainsImpl(absl::string_view element) const {
  if (chunks_.empty()) return false;
  SearchResult<Chunks::const_iterator> chunk =
      BinarySearch(chunks_.cbegin() + 1, chunks_.cend(), CompareFirst{element});
  --chunk.found;

  if (chunk.ordering == 0) return true;
  if (chunk.found == chunks_.cbegin()) {
    return chunk.found->set.contains(element);
  }
  return chunk.found->set.contains_skip_first(element);
}

bool ChunkedSortedStringSet::ContainsWithIndexImpl(absl::string_view element,
                                                   size_t& index) const {
  if (chunks_.empty()) {
    index = 0;
    return false;
  }
  SearchResult<Chunks::const_iterator> chunk =
      BinarySearch(chunks_.cbegin() + 1, chunks_.cend(), CompareFirst{element});
  --chunk.found;

  if (chunk.ordering == 0) {
    index = chunk.found->cumulative_end_index;
    return true;
  }
  if (chunk.found == chunks_.cbegin()) {
    return chunk.found->set.contains(element, &index);
  }
  const bool result = chunk.found->set.contains_skip_first(element, &index);
  index += chunk.found[-1].cumulative_end_index;
  return result;
}

bool ChunkedSortedStringSet::Equal(const ChunkedSortedStringSet& a,
                                   const ChunkedSortedStringSet& b) {
  return a.size() == b.size() &&
         std::equal(a.split_elements().cbegin(), SplitElementIterator(),
                    b.split_elements().cbegin(), SplitElementIterator());
}

StrongOrdering ChunkedSortedStringSet::Compare(
    const ChunkedSortedStringSet& a, const ChunkedSortedStringSet& b) {
  SplitElementIterator a_iter = a.split_elements().cbegin();
  SplitElementIterator b_iter = b.split_elements().cbegin();
  while (a_iter != SplitElementIterator()) {
    if (b_iter == SplitElementIterator()) return StrongOrdering::greater;
    if (const StrongOrdering ordering = riegeli::Compare(*a_iter, *b_iter);
        ordering != 0) {
      return ordering;
    }
    ++a_iter;
    ++b_iter;
  }
  return b_iter == SplitElementIterator() ? StrongOrdering::equal
                                          : StrongOrdering::less;
}

size_t ChunkedSortedStringSet::EncodedSize() const {
  size_t encoded_size = LengthVarint64(chunks_.size());
  for (const Chunk& chunk : chunks_) encoded_size += chunk.set.EncodedSize();
  return encoded_size;
}

absl::Status ChunkedSortedStringSet::EncodeImpl(Writer& dest) const {
  if (ABSL_PREDICT_FALSE(!WriteVarint64(chunks_.size(), dest))) {
    return dest.status();
  }
  for (const Chunk& chunk : chunks_) {
    if (absl::Status status = chunk.set.Encode(dest);
        ABSL_PREDICT_FALSE(!status.ok())) {
      return status;
    }
  }
  return absl::OkStatus();
}

absl::Status ChunkedSortedStringSet::DecodeImpl(Reader& src,
                                                DecodeOptions options) {
  uint64_t num_chunks;
  if (ABSL_PREDICT_FALSE(!ReadVarint64(src, num_chunks))) {
    return src.StatusOrAnnotate(
        absl::InvalidArgumentError("Malformed ChunkedSortedStringSet encoding "
                                   "(num_chunks)"));
  }
  if (ABSL_PREDICT_FALSE(num_chunks > options.max_num_chunks())) {
    return src.AnnotateStatus(absl::ResourceExhaustedError(absl::StrCat(
        "Maximum ChunkedSortedStringSet number of chunks exceeded: ",
        num_chunks, " > ", options.max_num_chunks())));
  }

  LinearSortedStringSet::DecodeState decode_state;
  const LinearSortedStringSet::DecodeOptions linear_options =
      LinearSortedStringSet::DecodeOptions()
          .set_validate(options.validate())
          .set_max_encoded_size(options.max_encoded_chunk_size())
          .set_decode_state(&decode_state);
  Chunks chunks(IntCast<size_t>(num_chunks));
  for (Chunk& chunk : chunks) {
    if (absl::Status status = chunk.set.Decode(src, linear_options);
        ABSL_PREDICT_FALSE(!status.ok())) {
      return status;
    }
    if (ABSL_PREDICT_FALSE(chunk.set.empty())) {
      return src.AnnotateStatus(absl::InvalidArgumentError(
          "Malformed ChunkedSortedStringSet encoding "
          "(empty chunk)"));
    }
    chunk.cumulative_end_index = decode_state.cumulative_size;
  }
  chunks_ = std::move(chunks);
  return absl::OkStatus();
}

template <typename LinearIterator>
ChunkedSortedStringSet::IteratorImpl<LinearIterator>&
ChunkedSortedStringSet::IteratorImpl<LinearIterator>::operator++() {
  RIEGELI_ASSERT(current_iterator_ != LinearIterator())
      << "Failed precondition of "
         "ChunkedSortedStringSet::IteratorImpl::operator++: "
         "iterator is end()";
  ++current_iterator_;
  if (ABSL_PREDICT_TRUE(current_iterator_ != LinearIterator())) {
    // Staying in the same chunk.
    return *this;
  }
  RIEGELI_ASSERT(current_chunk_iterator_ != set_->chunks_.cend())
      << "Failed invariant of ChunkedSortedStringSet::Iterator: "
         "current_chunk_iterator_ is end() but current_iterator_ was not";
  ++current_chunk_iterator_;
  if (ABSL_PREDICT_FALSE(current_chunk_iterator_ == set_->chunks_.cend())) {
    // Reached the end.
    return *this;
  }
  // Moving to the next chunk.
  current_iterator_ =
      GetLinearIterator<LinearIterator>(current_chunk_iterator_->set);
  return *this;
}

template ChunkedSortedStringSet::IteratorImpl<LinearSortedStringSet::Iterator>&
ChunkedSortedStringSet::IteratorImpl<
    LinearSortedStringSet::Iterator>::operator++();
template ChunkedSortedStringSet::IteratorImpl<
    LinearSortedStringSet::SplitElementIterator>&
ChunkedSortedStringSet::IteratorImpl<
    LinearSortedStringSet::SplitElementIterator>::operator++();

ChunkedSortedStringSet::Builder::Builder(Options options)
    : chunk_size_(options.chunk_size()) {
  ApplySizeHint(options.size_hint());
}

ChunkedSortedStringSet::Builder::Builder(Builder&& that) noexcept
    : chunk_size_(that.chunk_size_),
      chunks_(std::exchange(that.chunks_, Chunks())),
      current_builder_(std::move(that.current_builder_)) {}

ChunkedSortedStringSet::Builder& ChunkedSortedStringSet::Builder::operator=(
    Builder&& that) noexcept {
  chunk_size_ = that.chunk_size_;
  chunks_ = std::exchange(that.chunks_, Chunks());
  current_builder_ = std::move(that.current_builder_);
  return *this;
}

ChunkedSortedStringSet::Builder::~Builder() = default;

void ChunkedSortedStringSet::Builder::Reset(Options options) {
  chunk_size_ = options.chunk_size();
  chunks_.clear();
  current_builder_.Reset();
  ApplySizeHint(options.size_hint());
}

inline void ChunkedSortedStringSet::Builder::ApplySizeHint(size_t size_hint) {
  if (size_hint > 0) chunks_.reserve((size_hint - 1) / chunk_size_ + 1);
}

bool ChunkedSortedStringSet::Builder::InsertNext(absl::string_view element) {
  const absl::StatusOr<bool> inserted = TryInsertNext(element);
  RIEGELI_CHECK_OK(inserted)
      << "Failed precondition of ChunkedSortedStringSet::Builder::InsertNext()";
  return *inserted;
}

template <typename Element,
          std::enable_if_t<std::is_same_v<Element, std::string>, int>>
bool ChunkedSortedStringSet::Builder::InsertNext(Element&& element) {
  // `std::move(element)` is correct and `std::forward<Element>(element)` is not
  // necessary: `Element` is always `std::string`, never an lvalue reference.
  const absl::StatusOr<bool> inserted = TryInsertNext(std::move(element));
  RIEGELI_CHECK_OK(inserted)
      << "Failed precondition of ChunkedSortedStringSet::Builder::InsertNext()";
  return *inserted;
}

template bool ChunkedSortedStringSet::Builder::InsertNext(
    std::string&& element);

absl::StatusOr<bool> ChunkedSortedStringSet::Builder::TryInsertNext(
    absl::string_view element) {
  return InsertNextImpl(element);
}

template <typename Element,
          std::enable_if_t<std::is_same_v<Element, std::string>, int>>
absl::StatusOr<bool> ChunkedSortedStringSet::Builder::TryInsertNext(
    Element&& element) {
  // `std::move(element)` is correct and `std::forward<Element>(element)` is not
  // necessary: `Element` is always `std::string`, never an lvalue reference.
  return InsertNextImpl(std::move(element));
}

template absl::StatusOr<bool> ChunkedSortedStringSet::Builder::TryInsertNext(
    std::string&& element);

template <typename Element>
absl::StatusOr<bool> ChunkedSortedStringSet::Builder::InsertNextImpl(
    Element&& element) {
  if (ABSL_PREDICT_FALSE(current_builder_.size() == chunk_size_)) {
    if (ABSL_PREDICT_FALSE(element <= current_builder_.last())) {
      if (ABSL_PREDICT_TRUE(element == current_builder_.last())) return false;
      return absl::FailedPreconditionError(
          absl::StrCat("Elements are not sorted: new ", riegeli::Debug(element),
                       " < last ", riegeli::Debug(last())));
    }
    AddChunk();
  }
  if (absl::StatusOr<bool> inserted =
          current_builder_.TryInsertNext(std::forward<Element>(element));
      ABSL_PREDICT_FALSE(!inserted.ok() || !*inserted)) {
    return inserted;
  }
  return true;
}

ChunkedSortedStringSet ChunkedSortedStringSet::Builder::Build() {
  if (!current_builder_.empty()) AddChunk();
  ChunkedSortedStringSet set(std::exchange(chunks_, Chunks()));
  RIEGELI_ASSERT(empty())
      << "Failed postcondition of ChunkedSortedStringSet::Builder::Build(): "
         "builder should be empty";
  return set;
}

inline void ChunkedSortedStringSet::Builder::AddChunk() {
  const size_t cumulative_end_index =
      (chunks_.empty() ? 0 : chunks_.back().cumulative_end_index) +
      current_builder_.size();
#if __cpp_aggregate_paren_init
  chunks_.emplace_back(current_builder_.Build(), cumulative_end_index);
#else
  chunks_.push_back(Chunk{current_builder_.Build(), cumulative_end_index});
#endif
}

}  // namespace riegeli


================================================
FILE: riegeli/containers/chunked_sorted_string_set.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CONTAINERS_CHUNKED_SORTED_STRING_SET_H_
#define RIEGELI_CONTAINERS_CHUNKED_SORTED_STRING_SET_H_

#include <stddef.h>

#include <algorithm>
#include <initializer_list>
#include <iterator>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/base/macros.h"
#include "absl/base/optimization.h"
#include "absl/container/inlined_vector.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/compact_string.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/iterable.h"
#include "riegeli/base/memory_estimator.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/containers/linear_sorted_string_set.h"

namespace riegeli {

// A sorted set of strings, split into chunks, compressed by recognizing shared
// prefixes within each chunk.
//
// `ChunkedSortedStringSet` is optimized for memory usage.
class ChunkedSortedStringSet : public WithCompare<ChunkedSortedStringSet> {
 private:
  template <typename LinearIterator>
  class IteratorImpl;

 public:
  class Options {
   public:
    Options() noexcept {}

    // Tunes the number of elements encoded together. A larger `chunk_size`
    // reduces memory usage, but the time complexity of lookups is roughly
    // proportional to `chunk_size`.
    //
    // Default: `kDefaultChunkSize` (16).
    static constexpr size_t kDefaultChunkSize = 16;
    Options& set_chunk_size(size_t chunk_size) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      chunk_size_ = chunk_size;
      return *this;
    }
    Options&& set_chunk_size(size_t chunk_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_chunk_size(chunk_size));
    }
    size_t chunk_size() const { return chunk_size_; }

    // Expected final size, or 0 if unknown. This may improve performance and
    // memory usage.
    //
    // If the size hint turns out to not match reality, nothing breaks.
    //
    // Default: 0.
    Options& set_size_hint(size_t size_hint) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      size_hint_ = size_hint;
      return *this;
    }
    Options&& set_size_hint(size_t size_hint) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_size_hint(size_hint));
    }
    size_t size_hint() const { return size_hint_; }

   private:
    size_t chunk_size_ = kDefaultChunkSize;
    size_t size_hint_ = 0;
  };

  using SplitElement = LinearSortedStringSet::SplitElement;
  using SplitElementIterator =
      IteratorImpl<LinearSortedStringSet::SplitElementIterator>;
  class SplitElements;
  class Builder;
  class NextInsertIterator;

  // Options for `Decode()`.
  class DecodeOptions {
   public:
    DecodeOptions() noexcept {}

    // If `false`, performs partial validation of the structure of data, which
    // is sufficient to prevent undefined behavior when the set is used. The
    // only aspect not validated is that elements are sorted and unique. This is
    // faster. If elements are not sorted and unique, then iteration yields
    // elements in the stored order, and `contains()` may fail to find an
    // element which can be seen during iteration.
    //
    // If `true`, performs full validation of encoded data, including checking
    // that elements are sorted and unique. This is slower. This can be used for
    // parsing untrusted data.
    //
    // Default: `false`.
    DecodeOptions& set_validate(bool validate) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      validate_ = validate;
      return *this;
    }
    DecodeOptions&& set_validate(bool validate) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_validate(validate));
    }
    bool validate() const { return validate_; }

    // `Decode()` fails if more than `set_max_num_chunks()` chunks would need to
    // be created. This can be used for parsing untrusted data.
    //
    // Default: `Chunks().max_size()` with `Chunks` being the internal type
    // for representing chunks.
    DecodeOptions& set_max_num_chunks(size_t max_num_chunks) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      max_num_chunks_ = max_num_chunks;
      return *this;
    }
    DecodeOptions&& set_max_num_chunks(size_t max_num_chunks) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_max_num_chunks(max_num_chunks));
    }
    size_t max_num_chunks() const { return max_num_chunks_; }

    // `Decode()` fails if more than `max_encoded_chunk_size()` bytes would need
    // to be allocated for any chunk. This can be used for parsing untrusted
    // data.
    //
    // Default: `CompactString::max_size()`.
    DecodeOptions& set_max_encoded_chunk_size(size_t max_encoded_chunk_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      max_encoded_chunk_size_ = max_encoded_chunk_size;
      return *this;
    }
    DecodeOptions&& set_max_encoded_chunk_size(size_t max_encoded_chunk_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_max_encoded_chunk_size(max_encoded_chunk_size));
    }
    size_t max_encoded_chunk_size() const { return max_encoded_chunk_size_; }

   private:
    bool validate_ = false;
    size_t max_num_chunks_ = Chunks().max_size();
    size_t max_encoded_chunk_size_ = CompactString::max_size();
  };

  using value_type = absl::string_view;
  using reference = value_type;
  using const_reference = reference;
  using iterator = IteratorImpl<LinearSortedStringSet::iterator>;
  using const_iterator = iterator;
  using size_type = size_t;
  using difference_type = ptrdiff_t;

  // Creates a set consisting of the given elements. They must be sorted.
  // Consecutive duplicates are inserted only once.
  //
  // The type of `src` must support iteration yielding `absl::string_view`:
  // `for (const absl::string_view element : src)`,
  // e.g. `std::vector<std::string>`.
  //
  // If `Src` supports random access iteration,
  // `std::distance(begin(src), end(src))` is automatically used as
  // `Options::size_hint()`.
  template <
      typename Src,
      std::enable_if_t<IsIterableOf<Src, absl::string_view>::value, int> = 0>
  static ChunkedSortedStringSet FromSorted(Src&& src,
                                           Options options = Options());
  static ChunkedSortedStringSet FromSorted(
      std::initializer_list<absl::string_view> src,
      Options options = Options());

  // Creates a set consisting of the given elements. They do not need to be
  // sorted. Duplicates are inserted only once.
  //
  // The type of `src` must support iteration yielding `absl::string_view`:
  // `for (const absl::string_view element : src)`,
  // e.g. `std::vector<std::string>`.
  //
  // If duplicates are expected, `Options::size_hint()` should apply before
  // removing duplicates.
  //
  // If `Src` supports random access iteration,
  // `std::distance(begin(src), end(src))` is automatically used as
  // `Options::size_hint()`.
  template <
      typename Src,
      std::enable_if_t<IsIterableOf<Src, absl::string_view>::value, int> = 0>
  static ChunkedSortedStringSet FromUnsorted(Src&& src,
                                             Options options = Options());
  static ChunkedSortedStringSet FromUnsorted(
      std::initializer_list<absl::string_view> src,
      Options options = Options());

  // An empty set.
  ChunkedSortedStringSet() = default;

  ChunkedSortedStringSet(const ChunkedSortedStringSet& that) = default;
  ChunkedSortedStringSet& operator=(const ChunkedSortedStringSet& that) =
      default;

  ChunkedSortedStringSet(ChunkedSortedStringSet&& that) noexcept = default;
  ChunkedSortedStringSet& operator=(ChunkedSortedStringSet&& that) noexcept =
      default;

  // Iteration over the set.
  iterator begin() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
  iterator cbegin() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
  iterator end() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
  iterator cend() const ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Returns a proxy for `LinearSortedStringSet` where each element is
  // represented as `SplitElement` rather than `absl::string_view`. This is
  // more efficient but less convenient.
  //
  // The `SplitElements` object is valid while the `LinearSortedStringSet` is
  // valid.
  SplitElements split_elements() const ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Returns `true` if the set is empty.
  bool empty() const { return chunks_.empty(); }

  // Returns the number of elements.
  size_t size() const {
    return chunks_.empty() ? 0 : chunks_.back().cumulative_end_index;
  }

  // Returns `true` if `element` is present in the set.
  //
  // If `index != nullptr`, sets `*index` to the index of `element` in the set,
  // or to the index where it would be inserted.
  //
  // Time complexity: `O(log(size / chunk_size) + chunk_size)`.
  bool contains(absl::string_view element, size_t* index = nullptr) const;

  friend bool operator==(const ChunkedSortedStringSet& a,
                         const ChunkedSortedStringSet& b) {
    return Equal(a, b);
  }
  friend StrongOrdering RIEGELI_COMPARE(const ChunkedSortedStringSet& a,
                                        const ChunkedSortedStringSet& b) {
    return Compare(a, b);
  }

  template <typename HashState>
  friend HashState AbslHashValue(HashState hash_state,
                                 const ChunkedSortedStringSet& self) {
    return self.HashValue(std::move(hash_state));
  }

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const ChunkedSortedStringSet* self,
                                        MemoryEstimator& memory_estimator) {
    memory_estimator.RegisterSubobjects(&self->chunks_);
  }

  // Returns the size of data that would be written by `Encode()`.
  size_t EncodedSize() const;

  // Encodes the set to a sequence of bytes.
  //
  // As for now the encoding is not guaranteed to not change in future.
  // Please ask qrczak@google.com if you need stability.
  template <typename Dest,
            std::enable_if_t<TargetRefSupportsDependency<Writer*, Dest>::value,
                             int> = 0>
  absl::Status Encode(Dest&& dest) const;

  // Decodes the set from the encoded form.
  template <typename Src,
            std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value,
                             int> = 0>
  absl::Status Decode(Src&& src, DecodeOptions options = DecodeOptions());

 private:
  struct Chunk {
    LinearSortedStringSet set;
    size_t cumulative_end_index;
  };

  using Chunks = absl::InlinedVector<Chunk, 1>;

  struct CompareFirst {
    StrongOrdering operator()(Chunks::const_iterator current) const {
      return riegeli::Compare(current->set.first(), element);
    }
    absl::string_view element;
  };

  template <typename LinearIterator>
  static LinearIterator GetLinearIterator(const LinearSortedStringSet& set);

  explicit ChunkedSortedStringSet(Chunks&& chunks);

  bool ContainsImpl(absl::string_view element) const;
  bool ContainsWithIndexImpl(absl::string_view element, size_t& index) const;
  static bool Equal(const ChunkedSortedStringSet& a,
                    const ChunkedSortedStringSet& b);
  static StrongOrdering Compare(const ChunkedSortedStringSet& a,
                                const ChunkedSortedStringSet& b);
  template <typename HashState>
  HashState HashValue(HashState hash_state) const;

  absl::Status EncodeImpl(Writer& dest) const;
  absl::Status DecodeImpl(Reader& src, DecodeOptions options);

  // Invariant: no `chunks_` are empty.
  Chunks chunks_;
};

// Iterates over a `LinearSortedStringSet` in the sorted order.
template <typename LinearIterator>
class ChunkedSortedStringSet::IteratorImpl
    : public WithEqual<IteratorImpl<LinearIterator>> {
 public:
  // `iterator_concept` is only `std::input_iterator_tag` because the
  // `std::forward_iterator` requirement and above require references to remain
  // valid while the range exists.
  using iterator_concept = std::input_iterator_tag;
  // `iterator_category` is only `std::input_iterator_tag` also because the
  // `LegacyForwardIterator` requirement and above require `reference` to be
  // a true reference type.
  using iterator_category = std::input_iterator_tag;
  using value_type = typename LinearIterator::value_type;
  using reference = value_type;
  using pointer = ArrowProxy<reference>;
  using difference_type = ptrdiff_t;

  // A sentinel value, equal to `end()`.
  IteratorImpl() = default;

  IteratorImpl(const IteratorImpl& that) = default;
  IteratorImpl& operator=(const IteratorImpl& that) = default;

  IteratorImpl(IteratorImpl&& that) noexcept = default;
  IteratorImpl& operator=(IteratorImpl&& that) noexcept = default;

  // Returns the current element.
  //
  // The `value_type` is valid until the next non-const operation on this
  // `IteratorImpl` because data behind the `value_type` are conditionally owned
  // by the `IteratorImpl`.
  reference operator*() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT(current_iterator_ != LinearIterator())
        << "Failed precondition of "
           "ChunkedSortedStringSet::IteratorImpl::operator*: "
           "iterator is end()";
    return *current_iterator_;
  }

  pointer operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT(current_iterator_ != LinearIterator())
        << "Failed precondition of "
           "ChunkedSortedStringSet::IteratorImpl::operator->: "
           "iterator is end()";
    return pointer(**this);
  }

  IteratorImpl& operator++();
  IteratorImpl operator++(int) {
    const IteratorImpl tmp = *this;
    ++*this;
    return tmp;
  }

  // Iterators can be compared even if they are associated with different
  // `ChunkedSortedStringSet` objects. All `end()` values are equal, while all
  // other values are not equal.
  friend bool operator==(const IteratorImpl& a, const IteratorImpl& b) {
    return a.current_iterator_ == b.current_iterator_;
  }

 private:
  friend class ChunkedSortedStringSet;  // For `IteratorImpl()`.

  using Chunks = ChunkedSortedStringSet::Chunks;

  explicit IteratorImpl(const ChunkedSortedStringSet* set)
      : current_iterator_(
            set->chunks_.empty()
                ? LinearIterator()
                : GetLinearIterator<LinearIterator>(set->chunks_.front().set)),
        current_chunk_iterator_(set->chunks_.cbegin()),
        set_(set) {}

  // Invariant:
  //    if `current_chunk_iterator_ == set_->chunks_.cend()` then
  //        `current_iterator_ == LinearIterator()`
  LinearIterator current_iterator_;
  Chunks::const_iterator current_chunk_iterator_ = Chunks::const_iterator();
  const ChunkedSortedStringSet* set_ = nullptr;
};

// A proxy for `ChunkedSortedStringSet` where each element is represented as
// `SplitElement` rather than `absl::string_view`. This is more efficient but
// less convenient.
class ChunkedSortedStringSet::SplitElements {
 public:
  using value_type = SplitElement;
  using reference = value_type;
  using const_reference = reference;
  using iterator = SplitElementIterator;
  using const_iterator = iterator;
  using size_type = size_t;
  using difference_type = ptrdiff_t;

  SplitElements(const SplitElements& that) = default;
  SplitElements& operator=(const SplitElements& that) = default;

  // Iteration over the set.
  //
  // The `SplitElementIterator` is valid while the `ChunkedSortedStringSet` is
  // valid. The `SplitElements` object does not need to be kept valid.
  SplitElementIterator begin() const { return SplitElementIterator(set_); }
  SplitElementIterator cbegin() const { return begin(); }
  SplitElementIterator end() const { return SplitElementIterator(); }
  SplitElementIterator cend() const { return end(); }

 private:
  friend class ChunkedSortedStringSet;  // For `SplitElements()`.

  explicit SplitElements(const ChunkedSortedStringSet* set) : set_(set) {}

  const ChunkedSortedStringSet* set_;
};

// Builds a `ChunkedSortedStringSet` from a sorted sequence of strings.
class ChunkedSortedStringSet::Builder {
 public:
  // Begins with an empty set.
  explicit Builder(Options options = Options());

  Builder(Builder&& that) noexcept;
  Builder& operator=(Builder&& that) noexcept;

  ~Builder();

  // Makes `*this` equivalent to a newly constructed `Builder`.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Options options = Options());

  // Returns an output iterator which inserts elements to this `Builder`.
  // Consecutive duplicates are inserted only once.
  //
  // Each inserted element must be greater than or equal to the last inserted
  // element.
  //
  // Inserting with a `NextInsertIterator` is equivalent to calling
  // `InsertNext()`. In particular if multiple iterators and explicit
  // `InsertNext()` calls are used together, then their combined element
  // sequence must be ordered.
  NextInsertIterator NextInserter();

  // Inserts an element. Consecutive duplicates are inserted only once.
  //
  // Precondition: `element` is greater than or equal to the last inserted
  // element.
  //
  // Returns `true` if `element` was inserted, or `false` if it is equal to the
  // last inserted element.
  //
  // If `std::string&&` is passed, it is moved only if the result is `true`.
  //
  // `std::string&&` is accepted with a template to avoid implicit conversions
  // to `std::string` which can be ambiguous against `absl::string_view`
  // (e.g. `const char*`).
  bool InsertNext(absl::string_view element);
  template <typename Element,
            std::enable_if_t<std::is_same_v<Element, std::string>, int> = 0>
  bool InsertNext(Element&& element);

  // Inserts an element. Elements out of order are skipped.
  //
  // Returns `true` if `element` was inserted, `false` if it is equal to the
  // last inserted element, or `absl::FailedPreconditionError()` if it is less
  // than the last inserted element.
  //
  // If `std::string&&` is passed, it is moved only if the result is `true`.
  //
  // `std::string&&` is accepted with a template to avoid implicit conversions
  // to `std::string` which can be ambiguous against `absl::string_view`
  // (e.g. `const char*`).
  absl::StatusOr<bool> TryInsertNext(absl::string_view element);
  template <typename Element,
            std::enable_if_t<std::is_same_v<Element, std::string>, int> = 0>
  absl::StatusOr<bool> TryInsertNext(Element&& element);

  // Returns `true` if the set is empty.
  bool empty() const { return chunks_.empty() && current_builder_.empty(); }

  // Returns the number of elements.
  size_t size() const {
    return (chunks_.empty() ? 0 : chunks_.back().cumulative_end_index) +
           current_builder_.size();
  }

  // Returns the last inserted element. The set must not be empty.
  absl::string_view last() const {
    RIEGELI_ASSERT(!empty())
        << "Failed precondition of ChunkedSortedStringSet::Builder::last(): "
           "empty set";
    return current_builder_.last();
  }

  // Builds the `ChunkedSortedStringSet` and resets the `Builder` to empty
  // state.
  ChunkedSortedStringSet Build();

 private:
  void ApplySizeHint(size_t size_hint);
  void AddChunk();

  // This template is defined and used only in chunked_sorted_string_set.cc.
  template <typename Element>
  absl::StatusOr<bool> InsertNextImpl(Element&& element);

  size_t chunk_size_;
  Chunks chunks_;
  LinearSortedStringSet::Builder current_builder_;
};

// Inserts elements to a `ChunkedSortedStringSet::Builder`. Consecutive
// duplicates are inserted only once.
//
// Each inserted element must be greater than or equal to the last inserted
// element.
class ChunkedSortedStringSet::NextInsertIterator {
 public:
  using iterator_concept = std::output_iterator_tag;
  using iterator_category = std::output_iterator_tag;
  using value_type = absl::string_view;
  using pointer = void;
  using difference_type = ptrdiff_t;

  class reference {
   public:
    // Inserts the next element.
    //
    // `std::string&&` is accepted with a template to avoid implicit conversions
    // to `std::string` which can be ambiguous against `absl::string_view`
    // (e.g. `const char*`).
    const reference& operator=(absl::string_view element) const {
      builder_->InsertNext(element);
      return *this;
    }
    template <typename Element,
              std::enable_if_t<std::is_same_v<Element, std::string>, int> = 0>
    const reference& operator=(Element&& element) const {
      // `std::move(element)` is correct and `std::forward<Element>(element)` is
      // not necessary: `Element` is always `std::string`, never an lvalue
      // reference.
      builder_->InsertNext(std::move(element));
      return *this;
    }

   private:
    friend class NextInsertIterator;
    explicit reference(Builder* builder) : builder_(builder) {}
    Builder* builder_;
  };

  // A sentinel value.
  NextInsertIterator() = default;

  NextInsertIterator(const NextInsertIterator& that) = default;
  NextInsertIterator& operator=(const NextInsertIterator& that) = default;

  reference operator*() const {
    RIEGELI_ASSERT_NE(builder_, nullptr)
        << "Failed precondition of NextInsertIterator::operator*: "
           "iterator is sentinel";
    return reference(builder_);
  }

  NextInsertIterator& operator++() { return *this; }
  NextInsertIterator operator++(int) { return ++*this; }

  Builder* builder() const { return builder_; }

 private:
  friend class Builder;  // For `NextInsertIterator()`.

  explicit NextInsertIterator(Builder* builder) : builder_(builder) {}

  Builder* builder_ = nullptr;
};

// Implementation details follow.

template <typename Src,
          std::enable_if_t<IsIterableOf<Src, absl::string_view>::value, int>>
ChunkedSortedStringSet ChunkedSortedStringSet::FromSorted(Src&& src,
                                                          Options options) {
  using std::begin;
  auto iter = begin(src);
  using std::end;
  auto end_iter = end(src);
  if (IsRandomAccessIterable<Src>::value) {
    options.set_size_hint(std::distance(iter, end_iter));
  }
  ChunkedSortedStringSet::Builder builder(std::move(options));
  for (; iter != end_iter; ++iter) {
    builder.InsertNext(*MaybeMakeMoveIterator<Src>(iter));
  }
  return builder.Build();
}

template <typename Src,
          std::enable_if_t<IsIterableOf<Src, absl::string_view>::value, int>>
inline ChunkedSortedStringSet ChunkedSortedStringSet::FromUnsorted(
    Src&& src, Options options) {
  using std::begin;
  auto iter = begin(src);
  using std::end;
  auto end_iter = end(src);
  if (IsRandomAccessIterable<Src>::value) {
    options.set_size_hint(std::distance(iter, end_iter));
  }
  using SrcIterator = decltype(iter);
  std::vector<SrcIterator> iterators;
  iterators.reserve(options.size_hint());
  for (; iter != end_iter; ++iter) iterators.push_back(iter);
  std::sort(iterators.begin(), iterators.end(),
            [](const SrcIterator& a, const SrcIterator& b) {
              return absl::string_view(*a) < absl::string_view(*b);
            });

  options.set_size_hint(iterators.size());
  ChunkedSortedStringSet::Builder builder(std::move(options));
  for (const SrcIterator& iter : iterators) {
    builder.InsertNext(*MaybeMakeMoveIterator<Src>(iter));
  }
  return builder.Build();
}

inline ChunkedSortedStringSet::iterator ChunkedSortedStringSet::begin() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return iterator(this);
}

inline ChunkedSortedStringSet::iterator ChunkedSortedStringSet::cbegin() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return begin();
}

inline ChunkedSortedStringSet::iterator ChunkedSortedStringSet::end() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return iterator();
}

inline ChunkedSortedStringSet::iterator ChunkedSortedStringSet::cend() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return end();
}

inline bool ChunkedSortedStringSet::contains(absl::string_view element,
                                             size_t* index) const {
  if (index == nullptr) {
    return ContainsImpl(element);
  } else {
    return ContainsWithIndexImpl(element, *index);
  }
}

inline ChunkedSortedStringSet::SplitElements
ChunkedSortedStringSet::split_elements() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return SplitElements(this);
}

template <>
inline LinearSortedStringSet::Iterator
ChunkedSortedStringSet::GetLinearIterator<LinearSortedStringSet::Iterator>(
    const LinearSortedStringSet& set) {
  return set.cbegin();
}

template <>
inline LinearSortedStringSet::SplitElementIterator ChunkedSortedStringSet::
    GetLinearIterator<LinearSortedStringSet::SplitElementIterator>(
        const LinearSortedStringSet& set) {
  return set.split_elements().cbegin();
}

template <typename HashState>
HashState ChunkedSortedStringSet::HashValue(HashState hash_state) const {
  for (const absl::string_view element : *this) {
    hash_state = HashState::combine(std::move(hash_state), element);
  }
  return HashState::combine(std::move(hash_state), size());
}

template <
    typename Dest,
    std::enable_if_t<TargetRefSupportsDependency<Writer*, Dest>::value, int>>
inline absl::Status ChunkedSortedStringSet::Encode(Dest&& dest) const {
  DependencyRef<Writer*, Dest> dest_dep(std::forward<Dest>(dest));
  if (dest_dep.IsOwning()) dest_dep->SetWriteSizeHint(EncodedSize());
  absl::Status status = EncodeImpl(*dest_dep);
  if (dest_dep.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_dep->Close())) {
      status.Update(dest_dep->status());
    }
  }
  return status;
}

template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
inline absl::Status ChunkedSortedStringSet::Decode(Src&& src,
                                                   DecodeOptions options) {
  DependencyRef<Reader*, Src> src_dep(std::forward<Src>(src));
  if (src_dep.IsOwning()) src_dep->SetReadAllHint(true);
  absl::Status status = DecodeImpl(*src_dep, options);
  if (src_dep.IsOwning()) {
    if (ABSL_PREDICT_TRUE(status.ok())) src_dep->VerifyEnd();
    if (ABSL_PREDICT_FALSE(!src_dep->Close())) status.Update(src_dep->status());
  }
  return status;
}

extern template ChunkedSortedStringSet::IteratorImpl<
    LinearSortedStringSet::Iterator>&
ChunkedSortedStringSet::IteratorImpl<
    LinearSortedStringSet::Iterator>::operator++();
extern template ChunkedSortedStringSet::IteratorImpl<
    LinearSortedStringSet::SplitElementIterator>&
ChunkedSortedStringSet::IteratorImpl<
    LinearSortedStringSet::SplitElementIterator>::operator++();

inline ChunkedSortedStringSet::NextInsertIterator
ChunkedSortedStringSet::Builder::NextInserter() {
  return NextInsertIterator(this);
}

extern template bool ChunkedSortedStringSet::Builder::InsertNext(
    std::string&& element);

extern template absl::StatusOr<bool>
ChunkedSortedStringSet::Builder::TryInsertNext(std::string&& element);

}  // namespace riegeli

#endif  // RIEGELI_CONTAINERS_CHUNKED_SORTED_STRING_SET_H_


================================================
FILE: riegeli/containers/linear_sorted_string_set.cc
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/containers/linear_sorted_string_set.h"

#include <stddef.h>
#include <stdint.h>

#include <algorithm>
#include <cstring>
#include <initializer_list>
#include <ios>
#include <optional>
#include <ostream>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/numeric/bits.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/compact_string.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/debug.h"
#include "riegeli/base/stream_utils.h"
#include "riegeli/bytes/compact_string_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/endian/endian_reading.h"
#include "riegeli/varint/varint_reading.h"
#include "riegeli/varint/varint_writing.h"

namespace riegeli {

namespace {

inline size_t CommonPrefix(absl::string_view a, absl::string_view b) {
  const size_t min_length = UnsignedMin(a.size(), b.size());
  size_t length = 0;
  if (min_length < sizeof(uint64_t)) {
    // Compare byte by byte.
    while (length < min_length) {
      if (a[length] != b[length]) return length;
      ++length;
    }
    return length;
  }

  // Compare whole blocks, except for the last pair.
  const size_t limit = min_length - sizeof(uint64_t);
  while (length < limit) {
    const uint64_t xor_result = ReadLittleEndian<uint64_t>(a.data() + length) ^
                                ReadLittleEndian<uint64_t>(b.data() + length);
    if (xor_result != 0) {
      return length + IntCast<size_t>(absl::countr_zero(xor_result)) / 8;
    }
    length += sizeof(uint64_t);
  }
  // Compare the last, possible incomplete blocks as whole blocks shifted
  // backwards.
  const uint64_t xor_result = ReadLittleEndian<uint64_t>(a.data() + limit) ^
                              ReadLittleEndian<uint64_t>(b.data() + limit);
  if (xor_result != 0) {
    return limit + IntCast<size_t>(absl::countr_zero(xor_result)) / 8;
  }
  return limit + sizeof(uint64_t);
}

}  // namespace

LinearSortedStringSet LinearSortedStringSet::FromSorted(
    std::initializer_list<absl::string_view> src) {
  return FromSorted<>(src);
}

LinearSortedStringSet LinearSortedStringSet::FromUnsorted(
    std::initializer_list<absl::string_view> src) {
  return FromUnsorted<>(src);
}

inline LinearSortedStringSet::LinearSortedStringSet(CompactString&& encoded)
    : encoded_(std::move(encoded)) {}

size_t LinearSortedStringSet::size() const {
  size_t size = 0;
  size_t current_length = 0;
  const absl::string_view encoded_view = encoded_;
  const char* ptr = encoded_view.data();
  const char* const limit = ptr + encoded_view.size();
  while (ptr != limit) {
    uint64_t tagged_length;
    const size_t tagged_length_length =
        ReadVarint64(ptr, PtrDistance(ptr, limit), tagged_length);
    RIEGELI_ASSUME_GT(tagged_length_length, 0u)
        << "Malformed LinearSortedStringSet encoding (tagged_length)";
    ptr += tagged_length_length;
    const uint64_t unshared_length = tagged_length >> 1;
    uint64_t shared_length;
    if ((tagged_length & 1) == 0) {
      // `shared_length == 0` and is not stored.
      shared_length = 0;
    } else {
      // `shared_length > 0` and is stored.
      const size_t shared_length_length =
          ReadVarint64(ptr, PtrDistance(ptr, limit), shared_length);
      RIEGELI_ASSUME_GT(shared_length_length, 0u)
          << "Malformed LinearSortedStringSet encoding (shared_length)";
      ptr += shared_length_length;
      // Compare `<` instead of `<=`, before `++shared_length`.
      RIEGELI_ASSERT_LT(shared_length, current_length)
          << "Malformed LinearSortedStringSet encoding "
             "(shared_length larger than previous element)";
      ++shared_length;
    }
    RIEGELI_ASSERT_LE(unshared_length, PtrDistance(ptr, limit))
        << "Malformed LinearSortedStringSet encoding (unshared)";
    current_length = IntCast<size_t>(shared_length + unshared_length);
    ptr += IntCast<size_t>(unshared_length);
    ++size;
  }
  return size;
}

absl::string_view LinearSortedStringSet::first() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  RIEGELI_ASSERT(!empty())
      << "Failed precondition of LinearSortedStringSet::first(): "
         "empty set";
  const absl::string_view encoded_view = encoded_;
  uint64_t tagged_length;
  const size_t tagged_length_length =
      ReadVarint64(encoded_view.data(), encoded_view.size(), tagged_length);
  RIEGELI_ASSUME_GT(tagged_length_length, 0u)
      << "Malformed LinearSortedStringSet encoding (tagged_length)";
  RIEGELI_ASSERT_EQ(tagged_length & 1, 0u)
      << "Malformed LinearSortedStringSet encoding "
         "(first element has shared_length > 0)";
  const uint64_t length = tagged_length >> 1;
  RIEGELI_ASSERT_LE(length, encoded_view.size() - tagged_length_length)
      << "Malformed LinearSortedStringSet encoding (unshared)";
  return absl::string_view(encoded_view.data() + tagged_length_length,
                           IntCast<size_t>(length));
}

bool LinearSortedStringSet::ContainsImpl(absl::string_view element,
                                         SplitElementIterator iterator,
                                         size_t& cumulative_index) {
  // Length of the prefix shared between `element` and `*iterator`.
  size_t common_length = 0;
  for (; iterator != SplitElementIterator(); ++iterator, ++cumulative_index) {
    // It would be incorrect to assume that if
    // `found.prefix().size() < common_length` then `*iterator > element`
    // because `found.prefix().size()` is not guaranteed to be maximal.
    const SplitElement found = *iterator;
    common_length = UnsignedMin(common_length, found.prefix().size());
    RIEGELI_ASSUME_LE(common_length, element.size())
        << "The invariant common_length <= element.size() should hold";
    if (common_length < found.prefix().size()) {
      common_length += CommonPrefix(found.prefix().substr(common_length),
                                    element.substr(common_length));
      if (common_length < found.prefix().size()) {
        RIEGELI_ASSUME_LE(common_length, element.size())
            << "The invariant common_length <= element.size() should hold";
        // The first difference, if any, is at
        // `found.prefix().data() + common_length`.
        RIEGELI_ASSERT_EQ(found.prefix().substr(0, common_length),
                          element.substr(0, common_length))
            << "common_length should cover an equal prefix";
        const absl::string_view found_middle =
            found.prefix().substr(common_length);
        const absl::string_view element_suffix = element.substr(common_length);
        RIEGELI_ASSERT(!found_middle.empty())
            << "Implied by common_length < found_prefix().size()";
        RIEGELI_ASSERT(element_suffix.empty() ||
                       found_middle.front() != element_suffix.front())
            << "common_length should cover the maximal common prefix";
        if (element_suffix.empty() ||
            static_cast<unsigned char>(found_middle.front()) >
                static_cast<unsigned char>(element_suffix.front())) {
          return false;
        }
        continue;
      }
    }

    RIEGELI_ASSERT_GE(common_length, found.prefix().size())
        << "common_length < found.prefix().size() was handled above";
    size_t common_length_in_suffix = common_length - found.prefix().size();
    RIEGELI_ASSUME_LE(common_length_in_suffix, found.suffix().size())
        << "The invariant common_length <= found.size() should hold";
    RIEGELI_ASSUME_LE(common_length, element.size())
        << "The invariant common_length <= element.size() should hold";
    common_length +=
        CommonPrefix(found.suffix().substr(common_length_in_suffix),
                     element.substr(common_length));
    common_length_in_suffix = common_length - found.prefix().size();
    RIEGELI_ASSUME_LE(common_length_in_suffix, found.suffix().size())
        << "The invariant common_length <= found.size() should hold";
    RIEGELI_ASSUME_LE(common_length, element.size())
        << "The invariant common_length <= element.size() should hold";
    // The first difference, if any, is at
    // `found.suffix().data() + (common_length - found_prefix().size())`.
    RIEGELI_ASSERT_EQ(
        SplitElement(found.prefix(),
                     found.suffix().substr(0, common_length_in_suffix)),
        element.substr(0, common_length))
        << "common_length should cover an equal prefix";
    const absl::string_view found_suffix =
        found.suffix().substr(common_length_in_suffix);
    const absl::string_view element_suffix = element.substr(common_length);
    RIEGELI_ASSERT(found_suffix.empty() || element_suffix.empty() ||
                   found_suffix.front() != element_suffix.front())
        << "common_length should cover the maximal common prefix";
    if (found_suffix.empty()) {
      if (element_suffix.empty()) return true;
    } else {
      if (element_suffix.empty() ||
          static_cast<unsigned char>(found_suffix.front()) >
              static_cast<unsigned char>(element_suffix.front())) {
        return false;
      }
    }
  }
  return false;  // Not found.
}

bool LinearSortedStringSet::Equal(const LinearSortedStringSet& a,
                                  const LinearSortedStringSet& b) {
  return std::equal(a.split_elements().cbegin(), SplitElementIterator(),
                    b.split_elements().cbegin(), SplitElementIterator());
}

StrongOrdering LinearSortedStringSet::Compare(const LinearSortedStringSet& a,
                                              const LinearSortedStringSet& b) {
  SplitElementIterator a_iter = a.split_elements().cbegin();
  SplitElementIterator b_iter = b.split_elements().cbegin();
  while (a_iter != SplitElementIterator()) {
    if (b_iter == SplitElementIterator()) return StrongOrdering::greater;
    if (const StrongOrdering ordering = riegeli::Compare(*a_iter, *b_iter);
        ordering != 0) {
      return ordering;
    }
    ++a_iter;
    ++b_iter;
  }
  return b_iter == SplitElementIterator() ? StrongOrdering::equal
                                          : StrongOrdering::less;
}

absl::Status LinearSortedStringSet::EncodeImpl(Writer& dest) const {
  if (ABSL_PREDICT_FALSE(!WriteVarint64(uint64_t{encoded_.size()}, dest))) {
    return dest.status();
  }
  if (ABSL_PREDICT_FALSE(!dest.Write(encoded_))) return dest.status();
  return absl::OkStatus();
}

absl::Status LinearSortedStringSet::DecodeImpl(Reader& src,
                                               DecodeOptions options) {
  uint64_t encoded_size;
  if (ABSL_PREDICT_FALSE(!ReadVarint64(src, encoded_size))) {
    return src.StatusOrAnnotate(
        absl::InvalidArgumentError("Malformed LinearSortedStringSet encoding "
                                   "(encoded_size)"));
  }
  if (ABSL_PREDICT_FALSE(encoded_size > options.max_encoded_size())) {
    return src.AnnotateStatus(absl::ResourceExhaustedError(absl::StrCat(
        "Maximum LinearSortedStringSet encoded length exceeded: ", encoded_size,
        " > ", options.max_encoded_size())));
  }
  CompactString encoded(IntCast<size_t>(encoded_size));
  if (ABSL_PREDICT_FALSE(
          !src.Read(IntCast<size_t>(encoded_size), encoded.data()))) {
    return src.StatusOrAnnotate(
        absl::InvalidArgumentError("Malformed LinearSortedStringSet encoding "
                                   "(encoded)"));
  }

  // Validate `encoded` and update `*options.decode_state()`.
  size_t size = 0;
  size_t current_length = 0;
  CompactString current_if_validated_and_shared;
  std::optional<absl::string_view> current_if_validated;
  if (options.decode_state() != nullptr &&
      options.decode_state()->last != nullptr) {
    current_if_validated = *options.decode_state()->last;
  }
  const absl::string_view encoded_view = encoded;
  const char* ptr = encoded_view.data();
  const char* const limit = ptr + encoded_view.size();
  while (ptr != limit) {
    uint64_t tagged_length;
    const size_t tagged_length_length =
        ReadVarint64(ptr, PtrDistance(ptr, limit), tagged_length);
    if (ABSL_PREDICT_FALSE(tagged_length_length == 0)) {
      return src.AnnotateStatus(absl::InvalidArgumentError(
          "Malformed LinearSortedStringSet encoding (tagged_length)"));
    }
    ptr += tagged_length_length;
    const uint64_t unshared_length = tagged_length >> 1;
    if ((tagged_length & 1) == 0) {
      // `shared_length == 0` and is not stored.
      if (ABSL_PREDICT_FALSE(unshared_length > PtrDistance(ptr, limit))) {
        return src.AnnotateStatus(absl::InvalidArgumentError(
            "Malformed LinearSortedStringSet encoding (unshared)"));
      }
      current_length = IntCast<size_t>(unshared_length);
      if (options.validate()) {
        if (ABSL_PREDICT_TRUE(current_if_validated != std::nullopt) &&
            ABSL_PREDICT_FALSE(absl::string_view(ptr, current_length) <=
                               *current_if_validated)) {
          return src.AnnotateStatus(absl::InvalidArgumentError(absl::StrCat(
              "Elements are not sorted and unique: new ",
              riegeli::Debug(absl::string_view(ptr, current_length)),
              " <= last ", riegeli::Debug(*current_if_validated))));
        }
        current_if_validated_and_shared.clear();
        current_if_validated = absl::string_view(ptr, current_length);
      }
    } else {
      // `shared_length > 0` and is stored.
      uint64_t shared_length;
      const size_t shared_length_length =
          ReadVarint64(ptr, PtrDistance(ptr, limit), shared_length);
      if (ABSL_PREDICT_FALSE(shared_length_length == 0)) {
        return src.AnnotateStatus(absl::InvalidArgumentError(
            "Malformed LinearSortedStringSet encoding (shared_length)"));
      }
      ptr += shared_length_length;
      // Compare `>=` instead of `>`, before `++shared_length`.
      if (ABSL_PREDICT_FALSE(shared_length >= current_length)) {
        return src.AnnotateStatus(absl::InvalidArgumentError(
            "Malformed LinearSortedStringSet encoding "
            "(shared_length larger than previous element)"));
      }
      ++shared_length;
      if (ABSL_PREDICT_FALSE(unshared_length > PtrDistance(ptr, limit))) {
        return src.AnnotateStatus(absl::InvalidArgumentError(
            "Malformed LinearSortedStringSet encoding (unshared)"));
      }
      current_length = IntCast<size_t>(shared_length + unshared_length);
      if (options.validate()) {
        if (ABSL_PREDICT_TRUE(current_if_validated != std::nullopt) &&
            ABSL_PREDICT_FALSE(absl::string_view(ptr, unshared_length) <=
                               current_if_validated->substr(shared_length))) {
          return src.AnnotateStatus(absl::InvalidArgumentError(absl::StrCat(
              "Elements are not sorted and unique: new ",
              riegeli::Debug(
                  SplitElement(current_if_validated->substr(0, shared_length),
                               absl::string_view(ptr, unshared_length))),
              " <= last ", riegeli::Debug(*current_if_validated))));
        }
        // The unshared part of the next element will be written here.
        char* current_unshared;
        if (current_if_validated_and_shared.empty()) {
          RIEGELI_ASSERT(current_if_validated != std::nullopt)
              << "shared_length > 0 implies that this is not the first element";
          char* const current_data =
              current_if_validated_and_shared.resize(current_length, 0);
          std::memcpy(current_data, current_if_validated->data(),
                      IntCast<size_t>(shared_length));
          current_unshared = current_data + IntCast<size_t>(shared_length);
        } else {
          current_unshared = current_if_validated_and_shared.resize(
              current_length, IntCast<size_t>(shared_length));
        }
        std::memcpy(current_unshared, ptr, IntCast<size_t>(unshared_length));
        current_if_validated = current_if_validated_and_shared;
      }
    }
    ptr += IntCast<size_t>(unshared_length);
    ++size;
  }
  if (options.decode_state() != nullptr && size > 0) {
    options.decode_state()->cumulative_size += size;
    if (options.validate()) {
      if (current_if_validated_and_shared.empty()) {
        options.decode_state()->last = *current_if_validated;
      } else {
        options.decode_state()->last =
            std::move(current_if_validated_and_shared);
      }
    }
  }
  encoded_ = std::move(encoded);
  return absl::OkStatus();
}

LinearSortedStringSet::Iterator& LinearSortedStringSet::Iterator::operator++() {
  RIEGELI_ASSERT_NE(cursor_, nullptr)
      << "Failed precondition of "
         "LinearSortedStringSet::Iterator::operator++: "
         "iterator is end()";
  if (cursor_ == limit_) {
    // `end()` was reached.
    cursor_ = nullptr;  // Mark `end()`.
    length_if_unshared_ = 0;
    current_if_shared_ = CompactString();  // Free memory.
    return *this;
  }
  const char* ptr = cursor_;
  uint64_t tagged_length;
  const size_t tagged_length_length =
      ReadVarint64(ptr, PtrDistance(ptr, limit_), tagged_length);
  RIEGELI_ASSUME_GT(tagged_length_length, 0u)
      << "Malformed LinearSortedStringSet encoding (tagged_length)";
  ptr += tagged_length_length;
  const uint64_t unshared_length = tagged_length >> 1;
  if ((tagged_length & 1) == 0) {
    // `shared_length == 0` and is not stored.
    RIEGELI_ASSERT_LE(unshared_length, PtrDistance(ptr, limit_))
        << "Malformed LinearSortedStringSet encoding (unshared)";
    current_if_shared_.clear();
    length_if_unshared_ = IntCast<size_t>(unshared_length);
    ptr += IntCast<size_t>(unshared_length);
    cursor_ = ptr;
    return *this;
  }
  // `shared_length > 0` and is stored.
  uint64_t shared_length;
  const size_t shared_length_length =
      ReadVarint64(ptr, PtrDistance(ptr, limit_), shared_length);
  RIEGELI_ASSUME_GT(shared_length_length, 0u)
      << "Malformed LinearSortedStringSet encoding (shared_length)";
  ptr += shared_length_length;
  // Compare `<` instead of `<=`, before `++shared_length`.
  RIEGELI_ASSERT_LT(shared_length, length_if_unshared_ > 0
                                       ? length_if_unshared_
                                       : current_if_shared_.size())
      << "Malformed LinearSortedStringSet encoding "
         "(shared_length larger than previous element)";
  ++shared_length;
  RIEGELI_ASSERT_LE(unshared_length, PtrDistance(ptr, limit_))
      << "Malformed LinearSortedStringSet encoding (unshared)";
  const size_t new_size = IntCast<size_t>(shared_length + unshared_length);
  // The unshared part of the next element will be written here.
  char* current_unshared;
  if (length_if_unshared_ > 0) {
    char* const current_data = current_if_shared_.resize(new_size, 0);
    std::memcpy(current_data, cursor_ - length_if_unshared_,
                IntCast<size_t>(shared_length));
    current_unshared = current_data + IntCast<size_t>(shared_length);
  } else {
    current_unshared =
        current_if_shared_.resize(new_size, IntCast<size_t>(shared_length));
  }
  std::memcpy(current_unshared, ptr, IntCast<size_t>(unshared_length));
  length_if_unshared_ = 0;
  ptr += IntCast<size_t>(unshared_length);
  cursor_ = ptr;
  return *this;
}

LinearSortedStringSet::SplitElement::operator std::string() const {
  return absl::StrCat(prefix(), suffix());
}

void LinearSortedStringSet::SplitElement::Output(std::ostream& dest) const {
  WriteWithPadding(dest, size(), [&] {
    dest.write(prefix().data(), IntCast<std::streamsize>(prefix().size()));
    dest.write(suffix().data(), IntCast<std::streamsize>(suffix().size()));
  });
}

LinearSortedStringSet::SplitElementIterator&
LinearSortedStringSet::SplitElementIterator::operator++() {
  RIEGELI_ASSERT_NE(cursor_, nullptr)
      << "Failed precondition of "
         "LinearSortedStringSet::SplitElementIterator::operator++: "
         "iterator is end()";
  if (cursor_ == limit_) {
    // `end()` was reached.
    cursor_ = nullptr;                    // Mark `end()`.
    prefix_if_stored_ = CompactString();  // Free memory.
    prefix_ = absl::string_view();
    suffix_length_ = 0;
    return *this;
  }
  const char* ptr = cursor_;
  uint64_t tagged_length;
  const size_t tagged_length_length =
      ReadVarint64(ptr, PtrDistance(ptr, limit_), tagged_length);
  RIEGELI_ASSUME_GT(tagged_length_length, 0u)
      << "Malformed LinearSortedStringSet encoding (tagged_length)";
  ptr += tagged_length_length;
  const uint64_t unshared_length = tagged_length >> 1;
  if ((tagged_length & 1) == 0) {
    // `shared_length == 0` and is not stored.
    RIEGELI_ASSERT_LE(unshared_length, PtrDistance(ptr, limit_))
        << "Malformed LinearSortedStringSet encoding (unshared)";
    prefix_ = absl::string_view();
    suffix_length_ = IntCast<size_t>(unshared_length);
    ptr += IntCast<size_t>(unshared_length);
    cursor_ = ptr;
    return *this;
  }
  // `shared_length > 0` and is stored.
  uint64_t shared_length;
  const size_t shared_length_length =
      ReadVarint64(ptr, PtrDistance(ptr, limit_), shared_length);
  RIEGELI_ASSUME_GT(shared_length_length, 0u)
      << "Malformed LinearSortedStringSet encoding (shared_length)";
  ptr += shared_length_length;
  // Compare `<` instead of `<=`, before `++shared_length`.
  RIEGELI_ASSERT_LT(shared_length, prefix_.size() + suffix_length_)
      << "Malformed LinearSortedStringSet encoding "
         "(shared_length larger than previous element)";
  ++shared_length;
  RIEGELI_ASSERT_LE(unshared_length, PtrDistance(ptr, limit_))
      << "Malformed LinearSortedStringSet encoding (unshared)";
  if (shared_length <= prefix_.size()) {
    prefix_ = prefix_.substr(0, IntCast<size_t>(shared_length));
  } else if (prefix_.empty()) {
    prefix_ = absl::string_view(cursor_ - suffix_length_,
                                IntCast<size_t>(shared_length));
  } else {
    // Append
    // `absl::string_view(cursor_ - suffix_length_,
    //                    IntCast<size_t>(shared_length) - prefix_.size())`
    // to `prefix_`, using `prefix_if_stored_` for storage.

    // The new prefix.
    char* prefix_data;
    // The suffix of the new prefix which is not shared with the previous
    // element will be written here.
    char* prefix_unshared;
    if (prefix_if_stored_.data() == prefix_.data()) {
      RIEGELI_ASSERT_GE(prefix_if_stored_.size(), prefix_.size())
          << "Failed invariant of LinearSortedStringSet::SplitElementIterator: "
             "prefix_ overflows prefix_if_stored_";
      // `prefix_if_stored_` already begins with `prefix_`.
      prefix_unshared = prefix_if_stored_.resize(IntCast<size_t>(shared_length),
                                                 prefix_.size());
      prefix_data = prefix_unshared - prefix_.size();
    } else {
      // Copy `prefix_` to the beginning of `prefix_if_stored_` first.
      prefix_data = prefix_if_stored_.resize(IntCast<size_t>(shared_length), 0);
      std::memcpy(prefix_data, prefix_.data(), prefix_.size());
      prefix_unshared = prefix_data + prefix_.size();
    }
    std::memcpy(prefix_unshared, cursor_ - suffix_length_,
                IntCast<size_t>(shared_length) - prefix_.size());
    prefix_ = absl::string_view(prefix_data, IntCast<size_t>(shared_length));
  }
  ptr += IntCast<size_t>(unshared_length);
  cursor_ = ptr;
  suffix_length_ = IntCast<size_t>(unshared_length);
  return *this;
}

bool LinearSortedStringSet::SplitElement::Equal(const SplitElement& a,
                                                const SplitElement& b) {
  if (a.size() != b.size()) return false;
  if (a.prefix().size() < b.prefix().size()) {
    const size_t split_point_distance = b.prefix().size() - a.prefix().size();
    RIEGELI_ASSUME_LE(split_point_distance, a.suffix().size())
        << "implied by a.size() == b.size()";
    RIEGELI_ASSUME_EQ(a.suffix().size() - split_point_distance,
                      b.suffix().size())
        << "implied by a.size() == b.size()";
    return a.prefix() != b.prefix().substr(0, a.prefix().size()) &&
           a.suffix().substr(0, split_point_distance) !=
               b.prefix().substr(a.prefix().size()) &&
           a.suffix().substr(split_point_distance) == b.suffix();
  } else if (a.prefix().size() == b.prefix().size()) {
    RIEGELI_ASSUME_EQ(a.suffix().size(), b.suffix().size())
        << "implied by a.size() == b.size() "
           "and a.prefix().size() == b.prefix().size()";
    return a.prefix() == b.prefix() && a.suffix() == b.suffix();
  } else {
    const size_t split_point_distance = a.prefix().size() - b.prefix().size();
    RIEGELI_ASSUME_LE(split_point_distance, b.suffix().size())
        << "implied by a.size() == b.size()";
    RIEGELI_ASSUME_EQ(b.suffix().size() - split_point_distance,
                      a.suffix().size())
        << "implied by a.size() == b.size()";
    return a.prefix().substr(0, b.prefix().size()) == b.prefix() &&
           a.prefix().substr(b.prefix().size()) ==
               b.suffix().substr(0, split_point_distance) &&
           a.suffix() == b.suffix().substr(split_point_distance);
  }
}

StrongOrdering LinearSortedStringSet::SplitElement::Compare(
    const SplitElement& a, const SplitElement& b) {
  if (a.prefix().size() < b.prefix().size()) {
    if (const StrongOrdering ordering = riegeli::Compare(
            a.prefix(), b.prefix().substr(0, a.prefix().size()));
        ordering != 0) {
      return ordering;
    }
    return riegeli::Compare(
        a.suffix(),
        SplitElement(b.prefix().substr(a.prefix().size()), b.suffix()));
  } else if (a.prefix().size() == b.prefix().size()) {
    if (const StrongOrdering ordering =
            riegeli::Compare(a.prefix(), b.prefix());
        ordering != 0) {
      return ordering;
    }
    return riegeli::Compare(a.suffix(), b.suffix());
  } else {
    if (const StrongOrdering ordering = riegeli::Compare(
            a.prefix().substr(0, b.prefix().size()), b.prefix());
        ordering != 0) {
      return ordering;
    }
    return riegeli::Compare(
        SplitElement(a.prefix().substr(b.prefix().size()), a.suffix()),
        b.suffix());
  }
}

bool LinearSortedStringSet::SplitElement::Equal(const SplitElement& a,
                                                absl::string_view b) {
  if (a.size() != b.size()) return false;
  RIEGELI_ASSUME_LE(a.prefix().size(), b.size())
      << "implied by a.size() == b.size()";
  RIEGELI_ASSUME_EQ(a.suffix().size(), b.size() - a.prefix().size())
      << "implied by a.size() == b.size()";
  return a.prefix() == b.substr(0, a.prefix().size()) &&
         a.suffix() == b.substr(a.prefix().size());
}

StrongOrdering LinearSortedStringSet::SplitElement::Compare(
    const SplitElement& a, absl::string_view b) {
  if (a.prefix().size() <= b.size()) {
    if (const StrongOrdering ordering =
            riegeli::Compare(a.prefix(), b.substr(0, a.prefix().size()));
        ordering != 0) {
      return ordering;
    }
    return riegeli::Compare(a.suffix(), b.substr(a.prefix().size()));
  } else {
    if (const StrongOrdering ordering =
            riegeli::Compare(a.prefix().substr(0, b.size()), b);
        ordering != 0) {
      return ordering;
    }
    return StrongOrdering::greater;
  }
}

LinearSortedStringSet::Builder::Builder() = default;

LinearSortedStringSet::Builder::Builder(Builder&& that) noexcept
    : writer_(
          std::exchange(that.writer_, CompactStringWriter<CompactString>())),
      size_(std::exchange(that.size_, 0)),
      last_(std::exchange(that.last_, std::string())) {}

LinearSortedStringSet::Builder& LinearSortedStringSet::Builder::operator=(
    Builder&& that) noexcept {
  writer_ = std::exchange(that.writer_, CompactStringWriter<CompactString>());
  size_ = std::exchange(that.size_, 0);
  last_ = std::exchange(that.last_, std::string());
  return *this;
}

LinearSortedStringSet::Builder::~Builder() = default;

void LinearSortedStringSet::Builder::Reset() {
  writer_.Reset();
  size_ = 0;
  last_.clear();
}

bool LinearSortedStringSet::Builder::InsertNext(absl::string_view element) {
  const absl::StatusOr<bool> inserted = TryInsertNext(element);
  RIEGELI_CHECK_OK(inserted)
      << "Failed precondition of LinearSortedStringSet::Builder::InsertNext()";
  return *inserted;
}

template <typename Element,
          std::enable_if_t<std::is_same_v<Element, std::string>, int>>
bool LinearSortedStringSet::Builder::InsertNext(Element&& element) {
  // `std::move(element)` is correct and `std::forward<Element>(element)` is not
  // necessary: `Element` is always `std::string`, never an lvalue reference.
  const absl::StatusOr<bool> inserted = TryInsertNext(std::move(element));
  RIEGELI_CHECK_OK(inserted)
      << "Failed precondition of LinearSortedStringSet::Builder::InsertNext()";
  return *inserted;
}

template bool LinearSortedStringSet::Builder::InsertNext(std::string&& element);

absl::StatusOr<bool> LinearSortedStringSet::Builder::TryInsertNext(
    absl::string_view element) {
  return InsertNextImpl(
      element, [this](absl::string_view element, size_t shared_length) {
        last_.erase(shared_length);
        const absl::string_view unshared(element.data() + shared_length,
                                         element.size() - shared_length);
        last_.append(unshared);
        RIEGELI_ASSERT_EQ(last_, element) << "last_ incorrectly reconstructed";
        return unshared;
      });
}

template <typename Element,
          std::enable_if_t<std::is_same_v<Element, std::string>, int>>
absl::StatusOr<bool> LinearSortedStringSet::Builder::TryInsertNext(
    Element&& element) {
  // `std::move(element)` is correct and `std::forward<Element>(element)` is not
  // necessary: `Element` is always `std::string`, never an lvalue reference.
  return InsertNextImpl(std::move(element),
                        [this](std::string&& element, size_t shared_length) {
                          last_ = std::move(element);
                          return absl::string_view(last_).substr(shared_length);
                        });
}

template absl::StatusOr<bool> LinearSortedStringSet::Builder::TryInsertNext(
    std::string&& element);

template <typename Element, typename UpdateLast>
absl::StatusOr<bool> LinearSortedStringSet::Builder::InsertNextImpl(
    Element&& element, UpdateLast update_last) {
  RIEGELI_ASSERT(writer_.is_open())
      << "Failed precondition of "
         "LinearSortedStringSet::Builder::TryInsertNext(): "
         "set already built or moved from";
  size_t shared_length = CommonPrefix(last_, element);
  const absl::string_view unshared_element(element.data() + shared_length,
                                           element.size() - shared_length);
  const absl::string_view unshared_last(last_.data() + shared_length,
                                        last_.size() - shared_length);
  if (ABSL_PREDICT_FALSE(unshared_element <= unshared_last) && !empty()) {
    if (ABSL_PREDICT_TRUE(unshared_element == unshared_last)) return false;
    return absl::FailedPreconditionError(
        absl::StrCat("Elements are not sorted: new ", riegeli::Debug(element),
                     " < last ", riegeli::Debug(last())));
  }
  if (shared_length == 1) {
    // If only the first byte is shared, write the element fully unshared.
    // The encoded length is the same, and this allows `Iterator` to avoid
    // allocating the string.
    shared_length = 0;
  }
  const absl::string_view unshared =
      update_last(std::forward<Element>(element), shared_length);
  const size_t unshared_length = unshared.size();
  // `shared_length` is stored if `shared_length > 0`.
  const uint64_t tagged_length =
      (uint64_t{unshared_length} << 1) |
      (shared_length > 0 ? uint64_t{1} : uint64_t{0});
  WriteVarint64(tagged_length, writer_);
  if (shared_length > 0) WriteVarint64(uint64_t{shared_length - 1}, writer_);
  writer_.Write(unshared);
  ++size_;
  return true;
}

LinearSortedStringSet LinearSortedStringSet::Builder::Build() {
  RIEGELI_ASSERT(writer_.is_open())
      << "Failed precondition of LinearSortedStringSet::Builder::Build(): "
         "set already built or moved from";
  RIEGELI_EVAL_ASSERT(writer_.Close())
      << "CompactStringWriter has no reason to fail: " << writer_.status();
  writer_.dest().shrink_to_fit();
  LinearSortedStringSet set(std::move(writer_.dest()));
  writer_.Reset();
  size_ = 0;
  last_.clear();
  RIEGELI_ASSERT(empty())
      << "Failed postcondition of LinearSortedStringSet::Builder::Build(): "
         "builder should be empty";
  return set;
}

}  // namespace riegeli


================================================
FILE: riegeli/containers/linear_sorted_string_set.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CONTAINERS_LINEAR_SORTED_STRING_SET_H_
#define RIEGELI_CONTAINERS_LINEAR_SORTED_STRING_SET_H_

#include <stddef.h>
#include <stdint.h>

#include <algorithm>
#include <initializer_list>
#include <iosfwd>
#include <iterator>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/compact_string.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/iterable.h"
#include "riegeli/base/optional_compact_string.h"
#include "riegeli/bytes/compact_string_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/varint/varint_writing.h"

namespace riegeli {

// A sorted set of strings, compressed by recognizing shared prefixes.
//
// `LinearSortedStringSet` is optimized for memory usage. It should be used
// only with very small sets (up to tens of elements), otherwise consider
// `ChunkedSortedStringSet`.
class LinearSortedStringSet : public WithCompare<LinearSortedStringSet> {
 public:
  class Iterator;
  class SplitElement;
  class SplitElementIterator;
  class SplitElements;
  class Builder;
  class NextInsertIterator;

  // When calling `Decode()` for a sequence of sets whose elements should be
  // ordered, a `DecodeState` is passed between calls. This is primarily used by
  // `ChunkedSortedStringSet::Decode()`.
  struct DecodeState {
    // Total number of elements decoded so far. The size is calculated as a side
    // effect of structural validation; calling `size()` later would be slower.
    size_t cumulative_size = 0;
    // If not `nullptr`, the last element in the last decoded set. Meaningful
    // only if `DecodeOptions::validate()`.
    OptionalCompactString last;
  };

  // Options for `Decode()`.
  class DecodeOptions {
   public:
    DecodeOptions() noexcept {}

    // If `false`, performs partial validation of the structure of data, which
    // is sufficient to prevent undefined behavior when the set is used. The
    // only aspect not validated is that elements are sorted and unique. This is
    // faster. If elements are not sorted and unique, then iteration yields
    // elements in the stored order, and `contains()` may fail to find an
    // element which can be seen during iteration.
    //
    // If `true`, performs full validation of encoded data, including checking
    // that elements are sorted and unique. This is slower. This can be used for
    // parsing untrusted data.
    //
    // Default: `false`.
    DecodeOptions& set_validate(bool validate) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      validate_ = validate;
      return *this;
    }
    DecodeOptions&& set_validate(bool validate) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_validate(validate));
    }
    bool validate() const { return validate_; }

    // `Decode()` fails if more than `max_encoded_size()` bytes would need to be
    // allocated. This can be used for parsing untrusted data.
    //
    // Default: `CompactString::max_size()`.
    DecodeOptions& set_max_encoded_size(size_t max_encoded_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      max_encoded_size_ = max_encoded_size;
      return *this;
    }
    DecodeOptions&& set_max_encoded_size(size_t max_encoded_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_max_encoded_size(max_encoded_size));
    }
    size_t max_encoded_size() const { return max_encoded_size_; }

    // When calling `Decode()` for a sequence of sets whose elements should be
    // ordered, a `DecodeState` is passed between calls. This is primarily used
    // by `ChunkedSortedStringSet::Decode()`.
    //
    // Default: `nullptr`.
    DecodeOptions& set_decode_state(DecodeState* decode_state) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      decode_state_ = decode_state;
      return *this;
    }
    DecodeOptions&& set_decode_state(DecodeState* decode_state) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_decode_state(decode_state));
    }
    DecodeState* decode_state() const { return decode_state_; }

   private:
    bool validate_ = false;
    size_t max_encoded_size_ = CompactString::max_size();
    DecodeState* decode_state_ = nullptr;
  };

  using value_type = absl::string_view;
  using reference = value_type;
  using const_reference = reference;
  using iterator = Iterator;
  using const_iterator = iterator;
  using size_type = size_t;
  using difference_type = ptrdiff_t;

  // Creates a set consisting of the given elements. They must be sorted.
  // Consecutive duplicates are inserted only once.
  //
  // The type of `src` must support iteration yielding `absl::string_view`:
  // `for (const absl::string_view element : src)`,
  // e.g. `std::vector<std::string>`.
  template <
      typename Src,
      std::enable_if_t<IsIterableOf<Src, absl::string_view>::value, int> = 0>
  static LinearSortedStringSet FromSorted(Src&& src);
  static LinearSortedStringSet FromSorted(
      std::initializer_list<absl::string_view> src);

  // Creates a set consisting of the given elements. They do not need to be
  // sorted. Duplicates are inserted only once.
  //
  // The type of `src` must support iteration yielding `absl::string_view`:
  // `for (const absl::string_view element : src)`,
  // e.g. `std::vector<std::string>`.
  template <
      typename Src,
      std::enable_if_t<IsIterableOf<Src, absl::string_view>::value, int> = 0>
  static LinearSortedStringSet FromUnsorted(Src&& src);
  static LinearSortedStringSet FromUnsorted(
      std::initializer_list<absl::string_view> src);

  // An empty set.
  LinearSortedStringSet() = default;

  LinearSortedStringSet(const LinearSortedStringSet& that) = default;
  LinearSortedStringSet& operator=(const LinearSortedStringSet& that) = default;

  LinearSortedStringSet(LinearSortedStringSet&& that) noexcept = default;
  LinearSortedStringSet& operator=(LinearSortedStringSet&& that) noexcept =
      default;

  // Iteration over the set.
  Iterator begin() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
  Iterator cbegin() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
  Iterator end() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
  Iterator cend() const ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Returns a proxy for `LinearSortedStringSet` where each element is
  // represented as `SplitElement` rather than `absl::string_view`. This is
  // more efficient but less convenient.
  //
  // The `SplitElements` object is valid while the `LinearSortedStringSet` is
  // valid.
  SplitElements split_elements() const ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Returns `true` if the set is empty.
  bool empty() const { return encoded_.empty(); }

  // Returns the number of elements.
  //
  // Time complexity: `O(size)`.
  size_t size() const;

  // Returns the first element. The set must not be empty.
  absl::string_view first() const ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Returns `true` if `element` is present in the set.
  //
  // If `index != nullptr`, sets `*index` to the index of `element` in the set,
  // or to the index where it would be inserted.
  //
  // Time complexity: `O(size)`.
  bool contains(absl::string_view element, size_t* index = nullptr) const;

  // Like `contains()`, but skips comparing `element` against `first()`.
  bool contains_skip_first(absl::string_view element,
                           size_t* index = nullptr) const;

  friend bool operator==(const LinearSortedStringSet& a,
                         const LinearSortedStringSet& b) {
    return Equal(a, b);
  }
  friend StrongOrdering RIEGELI_COMPARE(const LinearSortedStringSet& a,
                                        const LinearSortedStringSet& b) {
    return Compare(a, b);
  }

  template <typename HashState>
  friend HashState AbslHashValue(HashState hash_state,
                                 const LinearSortedStringSet& self) {
    return self.HashValue(std::move(hash_state));
  }

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const LinearSortedStringSet* self,
                                        MemoryEstimator& memory_estimator) {
    memory_estimator.RegisterSubobjects(&self->encoded_);
  }

  // Returns the size of data that would be written by `Encode()`.
  size_t EncodedSize() const;

  // Encodes the set to a sequence of bytes.
  //
  // As for now the encoding is not guaranteed to not change in future.
  // Please ask qrczak@google.com if you need stability.
  template <typename Dest,
            std::enable_if_t<TargetRefSupportsDependency<Writer*, Dest>::value,
                             int> = 0>
  absl::Status Encode(Dest&& dest) const;

  // Decodes the set from the encoded form.
  template <typename Src,
            std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value,
                             int> = 0>
  absl::Status Decode(Src&& src, DecodeOptions options = DecodeOptions());

 private:
  explicit LinearSortedStringSet(CompactString&& encoded);

  // Increments `cumulative_index` by the number of elements skipped.
  static bool ContainsImpl(absl::string_view element,
                           SplitElementIterator iterator,
                           size_t& cumulative_index);
  static bool Equal(const LinearSortedStringSet& a,
                    const LinearSortedStringSet& b);
  static StrongOrdering Compare(const LinearSortedStringSet& a,
                                const LinearSortedStringSet& b);
  template <typename HashState>
  HashState HashValue(HashState hash_state) const;

  absl::Status EncodeImpl(Writer& dest) const;
  absl::Status DecodeImpl(Reader& src, DecodeOptions options);

  // Representation of each other element, which consists of the prefix of the
  // previous element with length shared_length, concatenated with unshared,
  // where tagged_length = (unshared_length << 1) | (shared_length > 0 ? 1 : 0):
  //
  //  * tagged_length     : varint64
  //  * shared_length - 1 : varint64, if shared_length > 0
  //  * unshared          : char[unshared_length]
  CompactString encoded_;
};

// Iterates over a `LinearSortedStringSet` in the sorted order.
class LinearSortedStringSet::Iterator : public WithEqual<Iterator> {
 public:
  // `iterator_concept` is only `std::input_iterator_tag` because the
  // `std::forward_iterator` requirement and above require references to remain
  // valid while the range exists.
  using iterator_concept = std::input_iterator_tag;
  // `iterator_category` is only `std::input_iterator_tag` also because the
  // `LegacyForwardIterator` requirement and above require `reference` to be
  // a true reference type.
  using iterator_category = std::input_iterator_tag;
  using value_type = absl::string_view;
  using reference = value_type;
  using pointer = ArrowProxy<reference>;
  using difference_type = ptrdiff_t;

  // A sentinel value, equal to `end()`.
  Iterator() = default;

  Iterator(const Iterator& that) = default;
  Iterator& operator=(const Iterator& that) = default;

  Iterator(Iterator&& that) noexcept = default;
  Iterator& operator=(Iterator&& that) noexcept = default;

  // Returns the current element.
  //
  // The `absl::string_view` is valid until the next non-const operation on this
  // `Iterator` because data behind the `absl::string_view` are conditionally
  // owned by the `Iterator`.
  reference operator*() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT_NE(cursor_, nullptr)
        << "Failed precondition of "
           "LinearSortedStringSet::Iterator::operator*: "
           "iterator is end()";
    if (length_if_unshared_ > 0) {
      return absl::string_view(cursor_ - length_if_unshared_,
                               length_if_unshared_);
    } else {
      return current_if_shared_;
    }
  }

  pointer operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT_NE(cursor_, nullptr)
        << "Failed precondition of "
           "LinearSortedStringSet::Iterator::operator->: "
           "iterator is end()";
    return pointer(**this);
  }

  Iterator& operator++();
  Iterator operator++(int) {
    const Iterator tmp = *this;
    ++*this;
    return tmp;
  }

  // Iterators can be compared even if they are associated with different
  // `LinearSortedStringSet` objects. All `end()` values are equal, while all
  // other values are not equal.
  friend bool operator==(const Iterator& a, const Iterator& b) {
    return a.cursor_ == b.cursor_;
  }

 private:
  friend class LinearSortedStringSet;  // For `Iterator()`.

  explicit Iterator(absl::string_view encoded)
      : cursor_(encoded.data()), limit_(encoded.data() + encoded.size()) {
    RIEGELI_ASSERT(cursor_ != nullptr)
        << "Failed precondition of "
           "LinearSortedStringSet::Iterator::Iterator(): "
           "encoded.data() is nullptr";
    ++*this;
  }

  // `cursor_` points after the encoded current element in
  // `LinearSortedStringSet::encoded_`, or is `nullptr` for `end()` (this is
  // unambiguous because `CompactString::data()` is never `nullptr`).
  const char* cursor_ = nullptr;
  const char* limit_ = nullptr;
  // If `length_if_unshared_ > 0`, the current element is
  // `absl::string_view(cursor_ - length_if_unshared_, length_if_unshared_)`,
  // and `current_if_shared_` is unused and empty.
  //
  // If `length_if_unshared_ == 0`, the decoded current element is
  // `current_if_shared_`.
  size_t length_if_unshared_ = 0;
  // If `*this` is `end()`, or if `length_if_unshared_ > 0`, unused and empty.
  // Otherwise stores the decoded current element.
  CompactString current_if_shared_;
};

// Represents an element as the concatenation of two `absl::string_view` values:
// prefix and suffix. This is more efficient than a single `absl::string_view`
// but less convenient.
//
// The prefix is known to be shared with the previous element. It is not
// guaranteed to be the longest shared prefix though.
class LinearSortedStringSet::SplitElement : public WithCompare<SplitElement> {
 public:
  explicit SplitElement(absl::string_view prefix, absl::string_view suffix)
      : prefix_(prefix), suffix_(suffix) {}

  SplitElement(const SplitElement& that) = default;
  SplitElement& operator=(const SplitElement& that) = default;

  absl::string_view prefix() const { return prefix_; }
  absl::string_view suffix() const { return suffix_; }

  explicit operator std::string() const;

  bool empty() const { return prefix().empty() && suffix().empty(); }
  size_t size() const { return prefix().size() + suffix().size(); }

  const char& operator[](size_t index) const;
  const char& at(size_t index) const;
  const char& front() const;
  const char& back() const;

  void remove_prefix(size_t length);
  void remove_suffix(size_t length);

  friend bool operator==(const SplitElement& a, const SplitElement& b) {
    return Equal(a, b);
  }
  friend StrongOrdering RIEGELI_COMPARE(const SplitElement& a,
                                        const SplitElement& b) {
    return Compare(a, b);
  }

  friend bool operator==(const SplitElement& a, absl::string_view b) {
    return Equal(a, b);
  }
  friend StrongOrdering RIEGELI_COMPARE(const SplitElement& a,
                                        absl::string_view b) {
    return Compare(a, b);
  }

  // Default stringification by `absl::StrCat()` etc.
  template <typename Sink>
  friend void AbslStringify(Sink& dest, const SplitElement& src) {
    dest.Append(src.prefix());
    dest.Append(src.suffix());
  }

  friend std::ostream& operator<<(std::ostream& dest, const SplitElement& src) {
    src.Output(dest);
    return dest;
  }

  // Supports `riegeli::Debug()`.
  template <typename DebugStream>
  friend void RiegeliDebug(const SplitElement& src, DebugStream& dest) {
    dest.DebugStringQuote();
    dest.DebugStringFragment(src.prefix());
    dest.DebugStringFragment(src.suffix());
    dest.DebugStringQuote();
  }

 private:
  static bool Equal(const SplitElement& a, const SplitElement& b);
  static StrongOrdering Compare(const SplitElement& a, const SplitElement& b);
  static bool Equal(const SplitElement& a, absl::string_view b);
  static StrongOrdering Compare(const SplitElement& a, absl::string_view b);
  void Output(std::ostream& dest) const;

  absl::string_view prefix_;
  absl::string_view suffix_;
};

// Iterates over a `LinearSortedStringSet` in the sorted order.
//
// Each element is represented as `SplitElement` rather than
// `absl::string_view`, which is more efficient but less convenient.
class LinearSortedStringSet::SplitElementIterator
    : public WithEqual<SplitElementIterator> {
 public:
  // `iterator_concept` is only `std::input_iterator_tag` because the
  // `std::forward_iterator` requirement and above require references to remain
  // valid while the range exists.
  using iterator_concept = std::input_iterator_tag;
  // `iterator_category` is only `std::input_iterator_tag` also because the
  // `LegacyForwardIterator` requirement and above require `reference` to be
  // a true reference type.
  using iterator_category = std::input_iterator_tag;
  using value_type = SplitElement;
  using reference = value_type;
  using pointer = ArrowProxy<reference>;
  using difference_type = ptrdiff_t;

  // A sentinel value, equal to `end()`.
  SplitElementIterator() = default;

  SplitElementIterator(const SplitElementIterator& that) = default;
  SplitElementIterator& operator=(const SplitElementIterator& that) = default;

  SplitElementIterator(SplitElementIterator&& that) noexcept = default;
  SplitElementIterator& operator=(SplitElementIterator&& that) noexcept =
      default;

  // Returns the current element.
  //
  // The `SplitElement` is valid until the next non-const operation on this
  // `SplitElementIterator` because data behind the `SplitElement` are
  // conditionally owned by the `SplitElementIterator`.
  reference operator*() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT_NE(cursor_, nullptr)
        << "Failed precondition of "
           "LinearSortedStringSet::SplitElementIterator::operator*: "
           "iterator is end()";
    return SplitElement(
        prefix_, absl::string_view(cursor_ - suffix_length_, suffix_length_));
  }

  pointer operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT_NE(cursor_, nullptr)
        << "Failed precondition of "
           "LinearSortedStringSet::SplitElementIterator::operator->: "
           "iterator is end()";
    return pointer(**this);
  }

  SplitElementIterator& operator++();
  SplitElementIterator operator++(int) {
    const SplitElementIterator tmp = *this;
    ++*this;
    return tmp;
  }

  // Iterators can be compared even if they are associated with different
  // `LinearSortedStringSet` objects. All `end()` values are equal, while all
  // other values are not equal.
  friend bool operator==(const SplitElementIterator& a,
                         const SplitElementIterator& b) {
    return a.cursor_ == b.cursor_;
  }

 private:
  friend class SplitElements;  // For `SplitElementIterator()`.

  explicit SplitElementIterator(absl::string_view encoded)
      : cursor_(encoded.data()), limit_(encoded.data() + encoded.size()) {
    ++*this;
  }

  // `cursor_` points after the encoded current element in
  // `LinearSortedStringSet::encoded_`, or is `nullptr` for `end()` (this is
  // unambiguous because `CompactString::data()` is never `nullptr`).
  const char* cursor_ = nullptr;
  const char* limit_ = nullptr;
  // `prefix_if_stored_` is unused or provides storage for `prefix_` (might be
  // longer than `prefix_`).
  CompactString prefix_if_stored_;
  // If `*this` is `end()`, `prefix_.empty()` and `suffix_length_ == 0`.
  // Otherwise the current element is the concatenation of `prefix_` and
  // `absl::string_view(cursor_ - suffix_length_, suffix_length_)`.
  // `prefix_` points to a prefix of `prefix_if_stored_` or to a substring of
  // encoded data.
  absl::string_view prefix_;
  size_t suffix_length_ = 0;
};

// A proxy for `LinearSortedStringSet` where each element is represented as
// `SplitElement` rather than `absl::string_view`. This is more efficient but
// less convenient.
class LinearSortedStringSet::SplitElements {
 public:
  using value_type = SplitElement;
  using reference = value_type;
  using const_reference = reference;
  using iterator = SplitElementIterator;
  using const_iterator = iterator;
  using size_type = size_t;
  using difference_type = ptrdiff_t;

  SplitElements(const SplitElements& that) = default;
  SplitElements& operator=(const SplitElements& that) = default;

  // Iteration over the set.
  //
  // The `SplitElementIterator` is valid while the `LinearSortedStringSet` is
  // valid. The `SplitElements` object does not need to be kept valid.
  SplitElementIterator begin() const { return SplitElementIterator(encoded_); }
  SplitElementIterator cbegin() const { return begin(); }
  SplitElementIterator end() const { return SplitElementIterator(); }
  SplitElementIterator cend() const { return end(); }

 private:
  friend class LinearSortedStringSet;  // For `SplitElements()`.

  explicit SplitElements(const LinearSortedStringSet* set)
      : encoded_(set->encoded_) {}

  // Invariant: `encoded_.data() != nullptr`
  absl::string_view encoded_;
};

// Builds a `LinearSortedStringSet` from a sorted sequence of strings.
class LinearSortedStringSet::Builder {
 public:
  // Begins with an empty set.
  Builder();

  Builder(Builder&& that) noexcept;
  Builder& operator=(Builder&& that) noexcept;

  ~Builder();

  // Makes `*this` equivalent to a newly constructed `Builder`.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset();

  // Returns an output iterator which inserts elements to this `Builder`.
  // Consecutive duplicates are inserted only once.
  //
  // Each inserted element must be greater than or equal to the last inserted
  // element.
  //
  // Inserting with a `NextInsertIterator` is equivalent to calling
  // `InsertNext()`. In particular if multiple iterators and explicit
  // `InsertNext()` calls are used together, then their combined element
  // sequence must be ordered.
  NextInsertIterator NextInserter();

  // Inserts an element. Consecutive duplicates are inserted only once.
  //
  // Precondition: `element` is greater than or equal to the last inserted
  // element.
  //
  // Returns `true` if `element` was inserted, or `false` if it is equal to the
  // last inserted element.
  //
  // If `std::string&&` is passed, it is moved only if the result is `true`.
  //
  // `std::string&&` is accepted with a template to avoid implicit conversions
  // to `std::string` which can be ambiguous against `absl::string_view`
  // (e.g. `const char*`).
  bool InsertNext(absl::string_view element);
  template <typename Element,
            std::enable_if_t<std::is_same_v<Element, std::string>, int> = 0>
  bool InsertNext(Element&& element);

  // Inserts an element. Elements out of order are skipped.
  //
  // Returns `true` if `element` was inserted, `false` if it is equal to the
  // last inserted element, or `absl::FailedPreconditionError()` if it is less
  // than the last inserted element.
  //
  // If `std::string&&` is passed, it is moved only if the result is `true`.
  //
  // `std::string&&` is accepted with a template to avoid implicit conversions
  // to `std::string` which can be ambiguous against `absl::string_view`
  // (e.g. `const char*`).
  absl::StatusOr<bool> TryInsertNext(absl::string_view element);
  template <typename Element,
            std::enable_if_t<std::is_same_v<Element, std::string>, int> = 0>
  absl::StatusOr<bool> TryInsertNext(Element&& element);

  // Returns `true` if the set is empty.
  bool empty() const { return size_ == 0; }

  // Returns the number of elements.
  size_t size() const { return size_; }

  // Returns the last inserted element. The set must not be empty.
  absl::string_view last() const {
    RIEGELI_ASSERT(!empty())
        << "Failed precondition of LinearSortedStringSet::Builder::last(): "
           "empty set";
    return last_;
  }

  // Builds the `LinearSortedStringSet` and resets the `Builder` to empty state.
  LinearSortedStringSet Build();

 private:
  // This template is defined and used only in linear_sorted_string_set.cc.
  template <typename Element, typename UpdateLast>
  absl::StatusOr<bool> InsertNextImpl(Element&& element,
                                      UpdateLast update_last);

  CompactStringWriter<CompactString> writer_;
  size_t size_ = 0;
  std::string last_;
};

// Inserts elements to a `LinearSortedStringSet::Builder`. Consecutive
// duplicates are inserted only once.
//
// Each inserted element must be greater than or equal to the last inserted
// element.
class LinearSortedStringSet::NextInsertIterator {
 public:
  using iterator_concept = std::output_iterator_tag;
  using iterator_category = std::output_iterator_tag;
  using value_type = absl::string_view;
  using difference_type = ptrdiff_t;
  using pointer = void;

  class reference {
   public:
    // Inserts the next element.
    //
    // `std::string&&` is accepted with a template to avoid implicit conversions
    // to `std::string` which can be ambiguous against `absl::string_view`
    // (e.g. `const char*`).
    const reference& operator=(absl::string_view element) const {
      builder_->InsertNext(element);
      return *this;
    }
    template <typename Element,
              std::enable_if_t<std::is_same_v<Element, std::string>, int> = 0>
    const reference& operator=(Element&& element) const {
      // `std::move(element)` is correct and `std::forward<Element>(element)` is
      // not necessary: `Element` is always `std::string`, never an lvalue
      // reference.
      builder_->InsertNext(std::move(element));
      return *this;
    }

   private:
    friend class NextInsertIterator;
    explicit reference(Builder* builder) : builder_(builder) {}
    Builder* builder_;
  };

  // A sentinel value.
  NextInsertIterator() = default;

  NextInsertIterator(const NextInsertIterator& that) = default;
  NextInsertIterator& operator=(const NextInsertIterator& that) = default;

  reference operator*() const {
    RIEGELI_ASSERT_NE(builder_, nullptr)
        << "Failed precondition of NextInsertIterator::operator*: "
           "iterator is sentinel";
    return reference(builder_);
  }

  NextInsertIterator& operator++() { return *this; }
  NextInsertIterator operator++(int) { return ++*this; }

  Builder* builder() const { return builder_; }

 private:
  friend class Builder;  // For `NextInsertIterator()`.

  explicit NextInsertIterator(Builder* builder) : builder_(builder) {}

  Builder* builder_ = nullptr;
};

// Implementation details follow.

template <typename Src,
          std::enable_if_t<IsIterableOf<Src, absl::string_view>::value, int>>
LinearSortedStringSet LinearSortedStringSet::FromSorted(Src&& src) {
  using std::begin;
  auto iter = begin(src);
  using std::end;
  auto end_iter = end(src);
  LinearSortedStringSet::Builder builder;
  for (; iter != end_iter; ++iter) {
    builder.InsertNext(*MaybeMakeMoveIterator<Src>(iter));
  }
  return builder.Build();
}

template <typename Src,
          std::enable_if_t<IsIterableOf<Src, absl::string_view>::value, int>>
inline LinearSortedStringSet LinearSortedStringSet::FromUnsorted(Src&& src) {
  using std::begin;
  auto iter = begin(src);
  using std::end;
  auto end_iter = end(src);
  using SrcIterator = decltype(iter);
  std::vector<SrcIterator> iterators;
  if (IsRandomAccessIterable<Src>::value) {
    iterators.reserve(std::distance(iter, end_iter));
  }
  for (; iter != end_iter; ++iter) iterators.push_back(iter);
  std::sort(iterators.begin(), iterators.end(),
            [](const SrcIterator& a, const SrcIterator& b) {
              return absl::string_view(*a) < absl::string_view(*b);
            });

  LinearSortedStringSet::Builder builder;
  for (const SrcIterator& iter : iterators) {
    builder.InsertNext(*MaybeMakeMoveIterator<Src>(iter));
  }
  return builder.Build();
}

inline LinearSortedStringSet::Iterator LinearSortedStringSet::begin() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return Iterator(encoded_);
}

inline LinearSortedStringSet::Iterator LinearSortedStringSet::cbegin() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return begin();
}

inline LinearSortedStringSet::Iterator LinearSortedStringSet::end() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return Iterator();
}

inline LinearSortedStringSet::Iterator LinearSortedStringSet::cend() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return end();
}

inline bool LinearSortedStringSet::contains(absl::string_view element,
                                            size_t* index) const {
  if (index == nullptr) {
    size_t counter = 0;
    return ContainsImpl(element, split_elements().cbegin(), counter);
  } else {
    *index = 0;
    return ContainsImpl(element, split_elements().cbegin(), *index);
  }
}

inline bool LinearSortedStringSet::contains_skip_first(
    absl::string_view element, size_t* index) const {
  SplitElementIterator iterator = split_elements().cbegin();
  if (iterator == SplitElementIterator()) return false;
  ++iterator;
  if (index == nullptr) {
    size_t counter = 1;
    return ContainsImpl(element, std::move(iterator), counter);
  } else {
    *index = 1;
    return ContainsImpl(element, std::move(iterator), *index);
  }
}

inline LinearSortedStringSet::SplitElements
LinearSortedStringSet::split_elements() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return SplitElements(this);
}

template <typename HashState>
HashState LinearSortedStringSet::HashValue(HashState hash_state) const {
  size_t size = 0;
  for (const absl::string_view element : *this) {
    hash_state = HashState::combine(std::move(hash_state), element);
    ++size;
  }
  return HashState::combine(std::move(hash_state), size);
}

inline size_t LinearSortedStringSet::EncodedSize() const {
  return LengthVarint64(uint64_t{encoded_.size()}) + encoded_.size();
}

template <
    typename Dest,
    std::enable_if_t<TargetRefSupportsDependency<Writer*, Dest>::value, int>>
inline absl::Status LinearSortedStringSet::Encode(Dest&& dest) const {
  DependencyRef<Writer*, Dest> dest_dep(std::forward<Dest>(dest));
  if (dest_dep.IsOwning()) dest_dep->SetWriteSizeHint(EncodedSize());
  absl::Status status = EncodeImpl(*dest_dep);
  if (dest_dep.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_dep->Close())) {
      status.Update(dest_dep->status());
    }
  }
  return status;
}

template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
inline absl::Status LinearSortedStringSet::Decode(Src&& src,
                                                  DecodeOptions options) {
  DependencyRef<Reader*, Src> src_dep(std::forward<Src>(src));
  if (src_dep.IsOwning()) src_dep->SetReadAllHint(true);
  absl::Status status = DecodeImpl(*src_dep, options);
  if (src_dep.IsOwning()) {
    if (ABSL_PREDICT_TRUE(status.ok())) src_dep->VerifyEnd();
    if (ABSL_PREDICT_FALSE(!src_dep->Close())) status.Update(src_dep->status());
  }
  return status;
}

inline const char& LinearSortedStringSet::SplitElement::operator[](
    size_t index) const {
  RIEGELI_ASSERT_LT(index, size())
      << "Failed precondition of "
         "LinearSortedStringSet::SplitElement::operator[]: "
         "index out of range";
  return index < prefix().size() ? prefix()[index]
                                 : suffix()[index - prefix().size()];
}

inline const char& LinearSortedStringSet::SplitElement::at(size_t index) const {
  RIEGELI_CHECK_LT(index, size())
      << "Failed precondition of LinearSortedStringSet::SplitElement::at(): "
         "index out of range";
  return index < prefix().size() ? prefix()[index]
                                 : suffix()[index - prefix().size()];
}

inline const char& LinearSortedStringSet::SplitElement::front() const {
  RIEGELI_ASSERT(!empty()) << "Failed precondition of "
                              "LinearSortedStringSet::SplitElement::front(): "
                              "empty string";
  return prefix().empty() ? suffix().front() : prefix().front();
}

inline const char& LinearSortedStringSet::SplitElement::back() const {
  RIEGELI_ASSERT(!empty()) << "Failed precondition of "
                              "LinearSortedStringSet::SplitElement::back(): "
                              "empty string";
  return suffix().empty() ? prefix().back() : suffix().back();
}

inline void LinearSortedStringSet::SplitElement::remove_prefix(size_t length) {
  RIEGELI_ASSERT_LE(length, size())
      << "Failed precondition of "
         "LinearSortedStringSet::SplitElement::remove_prefix(): "
         "length out of range";
  if (length <= prefix().size()) {
    prefix_.remove_prefix(length);
  } else {
    suffix_.remove_prefix(length - prefix().size());
    prefix_ = absl::string_view();
  }
}

inline void LinearSortedStringSet::SplitElement::remove_suffix(size_t length) {
  RIEGELI_ASSERT_LE(length, size())
      << "Failed precondition of "
         "LinearSortedStringSet::SplitElement::remove_suffix(): "
         "length out of range";
  if (length <= suffix().size()) {
    suffix_.remove_suffix(length);
  } else {
    prefix_.remove_suffix(length - suffix().size());
    suffix_ = absl::string_view();
  }
}

inline LinearSortedStringSet::NextInsertIterator
LinearSortedStringSet::Builder::NextInserter() {
  return NextInsertIterator(this);
}

extern template bool LinearSortedStringSet::Builder::InsertNext(
    std::string&& element);

extern template absl::StatusOr<bool>
LinearSortedStringSet::Builder::TryInsertNext(std::string&& element);

}  // namespace riegeli

#endif  // RIEGELI_CONTAINERS_LINEAR_SORTED_STRING_SET_H_


================================================
FILE: riegeli/csv/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "csv_reader",
    srcs = ["csv_reader.cc"],
    hdrs = ["csv_reader.h"],
    deps = [
        ":csv_record",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:debug",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:reset",
        "//riegeli/base:status",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:string_reader",
        "//riegeli/bytes:string_writer",
        "//riegeli/lines:line_reading",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "csv_writer",
    srcs = ["csv_writer.cc"],
    hdrs = ["csv_writer.h"],
    deps = [
        ":csv_record",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:debug",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:iterable",
        "//riegeli/base:object",
        "//riegeli/base:reset",
        "//riegeli/base:status",
        "//riegeli/bytes:string_writer",
        "//riegeli/bytes:writer",
        "//riegeli/lines:line_writing",
        "//riegeli/lines:newline",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "csv_record",
    srcs = ["csv_record.cc"],
    hdrs = ["csv_record.h"],
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:compare",
        "//riegeli/base:global",
        "//riegeli/base:initializer",
        "//riegeli/base:iterable",
        "//riegeli/base:reset",
        "//riegeli/base:shared_ptr",
        "//riegeli/base:string_ref",
        "//riegeli/base:type_traits",
        "//riegeli/bytes:ostream_writer",
        "//riegeli/bytes:string_writer",
        "//riegeli/bytes:stringify_writer",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/synchronization",
        "@com_google_absl//absl/types:span",
    ],
)


================================================
FILE: riegeli/csv/csv_reader.cc
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/csv/csv_reader.h"

#include <stddef.h>

#include <array>
#include <functional>
#include <optional>
#include <string>
#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/debug.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/status.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/string_reader.h"
#include "riegeli/bytes/string_writer.h"
#include "riegeli/csv/csv_record.h"
#include "riegeli/lines/line_reading.h"

namespace riegeli {

void CsvReaderBase::Initialize(Reader* src, Options&& options) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of CsvReader: null Reader pointer";
  // Set `has_header_` before early returns because `ReadRecord(CsvRecord&)`
  // uses this as a precondition.
  if (options.required_header() != std::nullopt ||
      options.assumed_header() != std::nullopt) {
    RIEGELI_ASSERT(options.required_header() == std::nullopt ||
                   options.assumed_header() == std::nullopt)
        << "Failed precondition of CsvReader: "
           "required_header() and assumed_header() both set";
    has_header_ = true;
  }

  if (options.comment() != std::nullopt &&
      ABSL_PREDICT_FALSE(*options.comment() == '\n' ||
                         *options.comment() == '\r')) {
    Fail(absl::InvalidArgumentError(
        absl::StrCat("Comment character conflicts with record separator: ",
                     riegeli::Debug(*options.comment()))));
    return;
  }
  if (ABSL_PREDICT_FALSE(options.field_separator() == '\n' ||
                         options.field_separator() == '\r')) {
    Fail(absl::InvalidArgumentError(
        absl::StrCat("Field separator conflicts with record separator: ",
                     riegeli::Debug(options.field_separator()))));
    return;
  }
  if (ABSL_PREDICT_FALSE(options.field_separator() == options.comment())) {
    Fail(absl::InvalidArgumentError(
        absl::StrCat("Field separator conflicts with comment character: ",
                     riegeli::Debug(options.field_separator()))));
    return;
  }
  if (options.quote() != std::nullopt) {
    if (ABSL_PREDICT_FALSE(*options.quote() == '\n' ||
                           *options.quote() == '\r')) {
      Fail(absl::InvalidArgumentError(
          absl::StrCat("Quote character conflicts with record separator: ",
                       riegeli::Debug(*options.quote()))));
      return;
    }
    if (ABSL_PREDICT_FALSE(*options.quote() == options.comment())) {
      Fail(absl::InvalidArgumentError(
          absl::StrCat("Quote character conflicts with comment character: ",
                       riegeli::Debug(*options.quote()))));
      return;
    }
    if (ABSL_PREDICT_FALSE(*options.quote() == options.field_separator())) {
      Fail(absl::InvalidArgumentError(
          absl::StrCat("Quote character conflicts with field separator: ",
                       riegeli::Debug(*options.quote()))));
      return;
    }
  }
  if (options.escape() != std::nullopt) {
    if (ABSL_PREDICT_FALSE(*options.escape() == '\n' ||
                           *options.escape() == '\r')) {
      Fail(absl::InvalidArgumentError(
          absl::StrCat("Escape character conflicts with record separator: ",
                       riegeli::Debug(*options.escape()))));
      return;
    }
    if (ABSL_PREDICT_FALSE(*options.escape() == options.comment())) {
      Fail(absl::InvalidArgumentError(
          absl::StrCat("Escape character conflicts with comment character: ",
                       riegeli::Debug(*options.escape()))));
      return;
    }
    if (ABSL_PREDICT_FALSE(*options.escape() == options.field_separator())) {
      Fail(absl::InvalidArgumentError(
          absl::StrCat("Escape character conflicts with field separator: ",
                       riegeli::Debug(*options.escape()))));
      return;
    }
    if (ABSL_PREDICT_FALSE(*options.escape() == options.quote())) {
      Fail(absl::InvalidArgumentError(
          absl::StrCat("Escape character conflicts with quote character: ",
                       riegeli::Debug(*options.escape()))));
      return;
    }
  }

  char_classes_['\n'] = CharClass::kLf;
  char_classes_['\r'] = CharClass::kCr;
  if (options.comment() != std::nullopt) {
    char_classes_[static_cast<unsigned char>(*options.comment())] =
        CharClass::kComment;
  }
  char_classes_[static_cast<unsigned char>(options.field_separator())] =
      CharClass::kFieldSeparator;
  if (options.quote() != std::nullopt) {
    char_classes_[static_cast<unsigned char>(*options.quote())] =
        CharClass::kQuote;
  }
  if (options.escape() != std::nullopt) {
    char_classes_[static_cast<unsigned char>(*options.escape())] =
        CharClass::kEscape;
  }
  skip_empty_lines_ = options.skip_empty_lines();
  quote_ = options.quote().value_or('\0');
  max_num_fields_ = options.max_num_fields();
  max_field_length_ = options.max_field_length();

  if (ABSL_PREDICT_FALSE(!src->ok())) {
    FailWithoutAnnotation(AnnotateOverSrc(src->status()));
    return;
  }
  if (!options.preserve_utf8_bom()) SkipUtf8Bom(*src);

  // Recovery is not applicable to reading the header. Hence `recovery_` is set
  // after reading the header.
  if (options.required_header() != std::nullopt) {
    std::vector<std::string> header;
    if (ABSL_PREDICT_FALSE(!ReadRecord(header))) {
      Fail(absl::InvalidArgumentError("Empty CSV file"));
    } else {
      --record_index_;
      if (header == options.required_header()->names()) {
        header_ = *std::move(options.required_header());
      } else if (const absl::Status status =
                     header_.TryReset(options.required_header()->normalizer(),
                                      std::move(header));
                 ABSL_PREDICT_FALSE(!status.ok())) {
        FailAtPreviousRecord(absl::InvalidArgumentError(status.message()));
      } else {
        std::vector<absl::string_view> missing_names;
        for (const absl::string_view field :
             options.required_header()->names()) {
          if (ABSL_PREDICT_FALSE(!header_.contains(field))) {
            missing_names.push_back(field);
          }
        }
        if (ABSL_PREDICT_FALSE(!missing_names.empty())) {
          StringWriter<std::string> message;
          message.Write("Missing field names: ");
          for (std::vector<absl::string_view>::const_iterator iter =
                   missing_names.cbegin();
               iter != missing_names.cend(); ++iter) {
            if (iter != missing_names.cbegin()) message.Write(',');
            csv_internal::WriteDebugQuotedIfNeeded(*iter, message);
          }
          message.Write("; existing field names: ");
          for (CsvHeader::const_iterator iter = header_.cbegin();
               iter != header_.cend(); ++iter) {
            if (iter != header_.cbegin()) message.Write(',');
            csv_internal::WriteDebugQuotedIfNeeded(*iter, message);
          }
          message.Close();
          FailAtPreviousRecord(absl::InvalidArgumentError(message.dest()));
        }
      }
    }
  } else if (options.assumed_header() != std::nullopt) {
    header_ = *std::move(options.assumed_header());
  }

  recovery_ = std::move(options.recovery());
}

void CsvReaderBase::FailAtPreviousRecord(absl::Status status) {
  RIEGELI_ASSERT(!status.ok())
      << "Failed precondition of CsvReaderBase::FailAtPreviousRecord(): "
         "status not failed";
  RIEGELI_ASSERT(!standalone_record_)
      << "Failed precondition of CsvReaderBase::FailAtPreviousRecord(): "
         "should never happen in ReadCsvRecordFromString()";
  if (is_open()) {
    Reader& src = *SrcReader();
    status = src.AnnotateStatus(std::move(status));
  }
  FailWithoutAnnotation(
      Annotate(status, absl::StrCat("at line ", last_line_number())));
}

absl::Status CsvReaderBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    Reader& src = *SrcReader();
    status = src.AnnotateStatus(std::move(status));
  }
  return AnnotateOverSrc(std::move(status));
}

absl::Status CsvReaderBase::AnnotateOverSrc(absl::Status status) {
  if (!standalone_record_) {
    return Annotate(status, absl::StrCat("at line ", line_number()));
  }
  return status;
}

bool CsvReaderBase::FailMaxFieldLengthExceeded() {
  recoverable_ = true;
  return Fail(absl::ResourceExhaustedError(
      absl::StrCat("Maximum field length exceeded: ", max_field_length_)));
}

inline void CsvReaderBase::SkipLine(Reader& src) {
  const char* ptr = src.cursor();
  for (;;) {
    if (ABSL_PREDICT_FALSE(ptr == src.limit())) {
      src.move_cursor(src.available());
      if (ABSL_PREDICT_FALSE(!src.Pull())) {
        // Set `line_number_` as if the last line was terminated by a newline.
        ++line_number_;
        return;
      }
      ptr = src.cursor();
    }
    if (*ptr == '\n') {
      ++line_number_;
      src.set_cursor(ptr + 1);
      return;
    }
    ++ptr;
  }
}

inline bool CsvReaderBase::ReadQuoted(Reader& src, std::string& field) {
  if (ABSL_PREDICT_FALSE(!field.empty())) {
    recoverable_ = true;
    return Fail(
        absl::InvalidArgumentError("Unquoted data before opening quote"));
  }

  // Data from `src.cursor()` to where `ptr` stops will be appended to `field`.
  const char* ptr = src.cursor();
  for (;;) {
    if (ABSL_PREDICT_FALSE(ptr == src.limit())) {
      if (ABSL_PREDICT_FALSE(src.available() >
                             max_field_length_ - field.size())) {
        return FailMaxFieldLengthExceeded();
      }
      field.append(src.cursor(), src.available());
      src.move_cursor(src.available());
      if (ABSL_PREDICT_FALSE(!src.Pull())) {
        if (ABSL_PREDICT_FALSE(!src.ok())) {
          return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
        }
        recoverable_ = true;
        return Fail(absl::InvalidArgumentError("Missing closing quote"));
      }
      ptr = src.cursor();
    }
    const CharClass char_class =
        char_classes_[static_cast<unsigned char>(*ptr++)];
    if (ABSL_PREDICT_TRUE(char_class == CharClass::kOther)) continue;
    switch (char_class) {
      case CharClass::kLf:
        ++line_number_;
        continue;
      case CharClass::kCr:
      case CharClass::kComment:
      case CharClass::kFieldSeparator:
        continue;
      default:
        break;
    }
    const size_t length = PtrDistance(src.cursor(), ptr - 1);
    if (ABSL_PREDICT_FALSE(length > max_field_length_ - field.size())) {
      return FailMaxFieldLengthExceeded();
    }
    field.append(src.cursor(), length);
    src.set_cursor(ptr);
    switch (char_class) {
      case CharClass::kOther:
      case CharClass::kLf:
      case CharClass::kCr:
      case CharClass::kComment:
      case CharClass::kFieldSeparator:
        RIEGELI_ASSUME_UNREACHABLE() << "Handled before switch";
      case CharClass::kQuote:
        if (ABSL_PREDICT_FALSE(!src.Pull())) {
          if (ABSL_PREDICT_FALSE(!src.ok())) {
            return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
          }
          return true;
        }
        if (*src.cursor() == quote_) {
          // Quote written twice.
          ptr = src.cursor() + 1;
          continue;
        }
        return true;
      case CharClass::kEscape:
        if (ABSL_PREDICT_FALSE(!src.Pull())) {
          if (ABSL_PREDICT_FALSE(!src.ok())) {
            return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
          }
          recoverable_ = true;
          return Fail(
              absl::InvalidArgumentError("Missing character after escape"));
        }
        ptr = src.cursor() + 1;
        continue;
    }
    RIEGELI_ASSUME_UNREACHABLE()
        << "Unknown character class: " << static_cast<int>(char_class);
  }
}

inline bool CsvReaderBase::ReadFields(Reader& src,
                                      std::vector<std::string>& fields,
                                      size_t& field_index) {
  RIEGELI_ASSERT_EQ(field_index, 0u)
      << "Failed precondition of CsvReaderBase::ReadFields(): "
         "initial index must be 0";
next_record:
  last_line_number_ = line_number_;
  if (standalone_record_) {
    if (ABSL_PREDICT_FALSE(record_index_ > 0)) return false;
  } else {
    if (ABSL_PREDICT_FALSE(!src.Pull())) {
      // End of file at the beginning of a record.
      if (ABSL_PREDICT_FALSE(!src.ok())) {
        return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
      }
      return false;
    }
  }

next_field:
  if (ABSL_PREDICT_FALSE(field_index == max_num_fields_)) {
    recoverable_ = true;
    return Fail(absl::ResourceExhaustedError(
        absl::StrCat("Maximum number of fields exceeded: ", max_num_fields_)));
  }
  if (fields.size() == field_index) {
    fields.emplace_back();
  } else {
    fields[field_index].clear();
  }
  std::string& field = fields[field_index];

  // Data from `src.cursor()` to where `ptr` stops will be appended to `field`.
  const char* ptr = src.cursor();
  for (;;) {
    if (ABSL_PREDICT_FALSE(ptr == src.limit())) {
      if (ABSL_PREDICT_FALSE(src.available() >
                             max_field_length_ - field.size())) {
        return FailMaxFieldLengthExceeded();
      }
      field.append(src.cursor(), src.available());
      src.move_cursor(src.available());
      if (ABSL_PREDICT_FALSE(!src.Pull())) {
        if (ABSL_PREDICT_FALSE(!src.ok())) {
          return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
        }
        // Set `line_number_` as if the last line was terminated by a newline.
        ++line_number_;
        return true;
      }
      ptr = src.cursor();
    }
    const CharClass char_class =
        char_classes_[static_cast<unsigned char>(*ptr++)];
    if (ABSL_PREDICT_TRUE(char_class == CharClass::kOther)) continue;
    switch (char_class) {
      case CharClass::kComment:
        if (field_index == 0 && field.empty() && ptr - 1 == src.cursor()) {
          src.set_cursor(ptr);
          SkipLine(src);
          goto next_record;
        }
        continue;
      default:
        break;
    }
    const size_t length = PtrDistance(src.cursor(), ptr - 1);
    if (ABSL_PREDICT_FALSE(length > max_field_length_ - field.size())) {
      return FailMaxFieldLengthExceeded();
    }
    field.append(src.cursor(), length);
    src.set_cursor(ptr);
    switch (char_class) {
      case CharClass::kOther:
      case CharClass::kComment:
        RIEGELI_ASSUME_UNREACHABLE() << "Handled before switch";
      case CharClass::kCr:
        if (ABSL_PREDICT_FALSE(!src.Pull())) {
          if (ABSL_PREDICT_FALSE(!src.ok())) {
            return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
          }
          recoverable_ = true;
          return Fail(absl::InvalidArgumentError("Missing LF after CR"));
        }
        if (ABSL_PREDICT_FALSE(*src.cursor() != '\n')) {
          recoverable_ = true;
          return Fail(absl::InvalidArgumentError("Missing LF after CR"));
        }
        src.move_cursor(1);
        ABSL_FALLTHROUGH_INTENDED;
      case CharClass::kLf:
        ++line_number_;
        if (skip_empty_lines_ && field_index == 0 && field.empty()) {
          goto next_record;
        }
        if (ABSL_PREDICT_FALSE(standalone_record_)) {
          return Fail(absl::InvalidArgumentError("Unexpected newline"));
        }
        return true;
      case CharClass::kFieldSeparator:
        ++field_index;
        goto next_field;
      case CharClass::kQuote: {
        if (ABSL_PREDICT_FALSE(!ReadQuoted(src, field))) return false;
        if (ABSL_PREDICT_FALSE(!src.Pull())) {
          if (ABSL_PREDICT_FALSE(!src.ok())) {
            return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
          }
          // Set `line_number_` as if the last line was terminated by a newline.
          ++line_number_;
          return true;
        }
        const CharClass char_class_after_quoted =
            char_classes_[static_cast<unsigned char>(*src.cursor())];
        src.move_cursor(1);
        switch (char_class_after_quoted) {
          case CharClass::kOther:
          case CharClass::kComment:
          case CharClass::kEscape:
            recoverable_ = true;
            return Fail(absl::InvalidArgumentError(
                "Unquoted data after closing quote"));
          case CharClass::kFieldSeparator:
            ++field_index;
            goto next_field;
          case CharClass::kCr:
            if (ABSL_PREDICT_FALSE(!src.Pull())) {
              if (ABSL_PREDICT_FALSE(!src.ok())) {
                return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
              }
              recoverable_ = true;
              return Fail(absl::InvalidArgumentError("Missing LF after CR"));
            }
            if (ABSL_PREDICT_FALSE(*src.cursor() != '\n')) {
              recoverable_ = true;
              return Fail(absl::InvalidArgumentError("Missing LF after CR"));
            }
            src.move_cursor(1);
            ABSL_FALLTHROUGH_INTENDED;
          case CharClass::kLf:
            ++line_number_;
            if (ABSL_PREDICT_FALSE(standalone_record_)) {
              return Fail(absl::InvalidArgumentError("Unexpected newline"));
            }
            return true;
          case CharClass::kQuote:
            RIEGELI_ASSUME_UNREACHABLE() << "Handled by ReadQuoted()";
        }
        RIEGELI_ASSUME_UNREACHABLE()
            << "Unknown character class: "
            << static_cast<int>(char_class_after_quoted);
      }
      case CharClass::kEscape:
        if (ABSL_PREDICT_FALSE(!src.Pull())) {
          if (ABSL_PREDICT_FALSE(!src.ok())) {
            return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
          }
          recoverable_ = true;
          return Fail(
              absl::InvalidArgumentError("Missing character after escape"));
        }
        ptr = src.cursor() + 1;
        continue;
    }
    RIEGELI_ASSUME_UNREACHABLE()
        << "Unknown character class: " << static_cast<int>(char_class);
  }
}

bool CsvReaderBase::ReadRecord(CsvRecord& record) {
  RIEGELI_CHECK(has_header())
      << "Failed precondition of CsvReaderBase::ReadRecord(CsvRecord&): "
         "CsvReaderBase::Options::required_header() != nullopt or "
         "assumed_header() != nullopt is required";
  if (ABSL_PREDICT_FALSE(!ok())) {
    record.Reset();
    return false;
  }
try_again:
  record.Reset(header_);
  // Reading directly into `record.fields_` must be careful to maintain the
  // invariant that `record.header_.size() == record.fields_.size()`.
  if (ABSL_PREDICT_FALSE(!ReadRecord(record.fields_))) {
    record.Reset();
    return false;
  }
  if (ABSL_PREDICT_FALSE(record.fields_.size() != header_.size())) {
    --record_index_;
    const size_t record_size = record.fields_.size();
    record.Reset();
    FailAtPreviousRecord(absl::InvalidArgumentError(
        absl::StrCat("Mismatched number of CSV fields: header has ",
                     header_.size(), ", record has ", record_size)));
    if (recovery_ != nullptr) {
      absl::Status status = this->status();
      MarkNotFailed();
      if (recovery_(std::move(status), *this)) goto try_again;
    }
    return false;
  }
  return true;
}

namespace csv_internal {

inline bool ReadStandaloneRecord(CsvReaderBase& csv_reader,
                                 std::vector<std::string>& record) {
  csv_reader.standalone_record_ = true;
  return csv_reader.ReadRecordInternal(record);
}

}  // namespace csv_internal

bool CsvReaderBase::ReadRecord(std::vector<std::string>& record) {
  return ReadRecordInternal(record);
}

inline bool CsvReaderBase::ReadRecordInternal(
    std::vector<std::string>& record) {
  if (ABSL_PREDICT_FALSE(!ok())) {
    record.clear();
    return false;
  }
  if (standalone_record_) {
    RIEGELI_ASSERT_EQ(record_index_, 0u)
        << "Failed precondition of CsvReaderBase::ReadRecordInternal(): "
           "called more than once by ReadCsvRecordFromString()";
  }
  Reader& src = *SrcReader();
try_again:
  size_t field_index = 0;
  // Assign to existing elements of `record` when possible and then `erase()`
  // excess elements, instead of calling `record.clear()` upfront, to avoid
  // losing existing `std::string` allocations.
  if (ABSL_PREDICT_FALSE(!ReadFields(src, record, field_index))) {
    if (recovery_ != nullptr && recoverable_) {
      recoverable_ = false;
      absl::Status status = this->status();
      MarkNotFailed();
      SkipLine(src);
      if (recovery_(std::move(status), *this)) goto try_again;
      if (standalone_record_) {
        // Recovery was cancelled. Return the same result as for an empty input:
        // one empty field.
        if (record.empty()) {
          record.emplace_back();
        } else {
          record[0].clear();
        }
        record.erase(record.begin() + 1, record.end());
        ++record_index_;
        return true;
      }
    }
    record.clear();
    return false;
  }
  record.erase(record.begin() + field_index + 1, record.end());
  ++record_index_;
  return true;
}

bool CsvReaderBase::HasNextRecord() {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  for (;;) {
    last_line_number_ = line_number_;
    if (ABSL_PREDICT_FALSE(!src.Pull())) {
      // End of file at the beginning of a record.
      if (ABSL_PREDICT_FALSE(!src.ok())) {
        return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
      }
      return false;
    }
    const CharClass char_class =
        char_classes_[static_cast<unsigned char>(*src.cursor())];
    switch (char_class) {
      case CharClass::kCr:
        src.move_cursor(1);
        if (ABSL_PREDICT_FALSE(!src.Pull())) {
          if (ABSL_PREDICT_FALSE(!src.ok())) {
            return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
          }
          return false;
        }
        if (ABSL_PREDICT_FALSE(*src.cursor() != '\n')) return false;
        ABSL_FALLTHROUGH_INTENDED;
      case CharClass::kLf:
        if (skip_empty_lines_) {
          ++line_number_;
          src.move_cursor(1);
          continue;
        }
        return true;
      case CharClass::kComment:
        SkipLine(src);
        continue;
      default:
        return true;
    }
  }
}

absl::Status ReadCsvRecordFromString(absl::string_view src,
                                     std::vector<std::string>& record,
                                     CsvReaderBase::Options options) {
  RIEGELI_ASSERT(options.required_header() == std::nullopt)
      << "Failed precondition of ReadCsvRecordFromString(): "
         "CsvReaderBase::Options::required_header() != nullopt not applicable";
  CsvReader csv_reader(riegeli::Maker<StringReader>(src), std::move(options));
  if (ABSL_PREDICT_FALSE(
          !csv_internal::ReadStandaloneRecord(csv_reader, record))) {
    RIEGELI_ASSERT(!csv_reader.ok())
        << "ReadStandaloneRecord() returned false but ok() is true";
    return csv_reader.status();
  }
  return absl::OkStatus();
}

}  // namespace riegeli


================================================
FILE: riegeli/csv/csv_reader.h
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CSV_CSV_READER_H_
#define RIEGELI_CSV_CSV_READER_H_

#include <stddef.h>
#include <stdint.h>

#include <array>
#include <functional>
#include <initializer_list>
#include <limits>
#include <optional>
#include <string>
#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/reset.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/csv/csv_record.h"

namespace riegeli {

class CsvReaderBase;

namespace csv_internal {

bool ReadStandaloneRecord(CsvReaderBase& csv_reader,
                          std::vector<std::string>& record);

}  // namespace csv_internal

// Template parameter independent part of `CsvReader`.
class CsvReaderBase : public Object {
 public:
  class Options {
   public:
    Options() noexcept {}

    // If not `std::nullopt`, automatically reads field names from the first
    // record, specifies how field names are normalized, and verifies that all
    // required fields are present (in any order).
    //
    // In this case `ReadRecord(CsvRecord&)` is supported. Otherwise no
    // particular header is assumed, and only `ReadRecord()` to a vector of
    // fields is supported.
    //
    // `set_required_header({})` specifies an empty set of required fields and
    // thus accepts any field names.
    //
    // If the file is empty, actual field names have duplicates, or some
    // required fields are not present, reading the header fails.
    //
    // `required_header()` and `assumed_header()` must not be both set.
    //
    // Default: `std::nullopt`.
    Options& set_required_header(Initializer<std::optional<CsvHeader>> header) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      riegeli::Reset(required_header_, std::move(header));
      return *this;
    }
    Options&& set_required_header(
        Initializer<std::optional<CsvHeader>> header) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_required_header(std::move(header)));
    }
    Options& set_required_header(
        std::initializer_list<absl::string_view> names) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return set_required_header(Initializer<std::optional<CsvHeader>>(names));
    }
    Options&& set_required_header(
        std::initializer_list<absl::string_view> names) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_required_header(names));
    }
    std::optional<CsvHeader>& required_header() ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return required_header_;
    }
    const std::optional<CsvHeader>& required_header() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return required_header_;
    }

    // If not `std::nullopt`, a header is not read from the file, but
    // `ReadRecord(CsvRecord&)` is supported as if this header was present as
    // the first record.
    //
    // `required_header()` and `assumed_header()` must not be both set.
    //
    // Default: `std::nullopt`.
    Options& set_assumed_header(Initializer<std::optional<CsvHeader>> header) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      riegeli::Reset(assumed_header_, std::move(header));
      return *this;
    }
    Options&& set_assumed_header(
        Initializer<std::optional<CsvHeader>> header) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_assumed_header(std::move(header)));
    }
    Options& set_assumed_header(
        std::initializer_list<absl::string_view> names) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return set_assumed_header(Initializer<std::optional<CsvHeader>>(names));
    }
    Options&& set_assumed_header(
        std::initializer_list<absl::string_view> names) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_assumed_header(names));
    }
    std::optional<CsvHeader>& assumed_header() ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return assumed_header_;
    }
    const std::optional<CsvHeader>& assumed_header() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return assumed_header_;
    }

    // If `false`, an initial UTF-8 BOM is skipped if present.
    //
    // If `true`, an initial UTF-8 BOM if present is treated as a part of the
    // first field in the first record. This is unlikely to be the intent, but
    // this conforms to RFC4180.
    //
    // Default: `false`.
    Options& set_preserve_utf8_bom(bool preserve_utf8_bom) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      preserve_utf8_bom_ = preserve_utf8_bom;
      return *this;
    }
    Options&& set_preserve_utf8_bom(bool preserve_utf8_bom) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_preserve_utf8_bom(preserve_utf8_bom));
    }
    bool preserve_utf8_bom() const { return preserve_utf8_bom_; }

    // If `false`, an empty line is interpreted as a record with one empty
    // field. This conforms to RFC4180.
    //
    // If `true`, empty lines are skipped.
    //
    // Default: `false`.
    Options& set_skip_empty_lines(bool skip_empty_lines) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      skip_empty_lines_ = skip_empty_lines;
      return *this;
    }
    Options&& set_skip_empty_lines(bool skip_empty_lines) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_skip_empty_lines(skip_empty_lines));
    }
    bool skip_empty_lines() const { return skip_empty_lines_; }

    // Comment character.
    //
    // If not `std::nullopt`, a line beginning with this character is skipped.
    // This is not covered by RFC4180.
    //
    // Often used: '#'.
    //
    // Default: `std::nullopt`.
    Options& set_comment(std::optional<char> comment) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      comment_ = comment;
      return *this;
    }
    Options&& set_comment(std::optional<char> comment) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_comment(comment));
    }
    std::optional<char> comment() const { return comment_; }

    // Field separator.
    //
    // Default: ','.
    Options& set_field_separator(char field_separator) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      field_separator_ = field_separator;
      return *this;
    }
    Options&& set_field_separator(char field_separator) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_field_separator(field_separator));
    }
    char field_separator() const { return field_separator_; }

    // Quote character.
    //
    // Quotes around a field allow expressing special characters inside the
    // field: LF, CR, comment character, field separator, or quote character
    // itself.
    //
    // To express a quote itself inside a field, it must be written twice when
    // the field is quoted, or preceded by an escape character.
    //
    // If `quote()` and `escape()` are both `std::nullopt`, special characters
    // inside fields are not expressible.
    //
    // Default: '"'.
    Options& set_quote(std::optional<char> quote) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      quote_ = quote;
      return *this;
    }
    Options&& set_quote(std::optional<char> quote) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_quote(quote));
    }
    std::optional<char> quote() const { return quote_; }

    // Escape character.
    //
    // If not `std::nullopt`, a character preceded by escape is treated
    // literally instead of possibly having a special meaning. This allows
    // expressing special characters inside a field: LF, CR, comment character,
    // field separator, or escape character itself. This is not covered by
    // RFC4180.
    //
    // If `quote()` and `escape()` are both `std::nullopt`, special characters
    // inside fields are not expressible.
    //
    // Default: `std::nullopt`.
    Options& set_escape(std::optional<char> escape) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      escape_ = escape;
      return *this;
    }
    Options&& set_escape(std::optional<char> escape) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_escape(escape));
    }
    std::optional<char> escape() const { return escape_; }

    // Expected maximum number of fields.
    //
    // If this number is exceeded, reading fails with
    // `absl::ResourceExhaustedError()`.
    //
    // `max_num_fields` must be at least 1.
    //
    // Default: `std::numeric_limits<size_t>::max()`.
    Options& set_max_num_fields(size_t max_num_fields) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GE(max_num_fields, 1u)
          << "Failed precondition of "
             "CsvReaderBase::Options::set_max_num_fields(): "
             "number of fields out of range";
      max_num_fields_ = max_num_fields;
      return *this;
    }
    Options&& set_max_num_fields(size_t max_num_fields) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_max_num_fields(max_num_fields));
    }
    size_t max_num_fields() const { return max_num_fields_; }

    // Expected maximum field length.
    //
    // If this length is exceeded, reading fails with
    // `absl::ResourceExhaustedError()`.
    //
    // Default: `std::numeric_limits<size_t>::max()`.
    Options& set_max_field_length(size_t max_field_length) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      max_field_length_ = max_field_length;
      return *this;
    }
    Options&& set_max_field_length(size_t max_field_length) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_max_field_length(max_field_length));
    }
    size_t max_field_length() const { return max_field_length_; }

    // Recovery function called after skipping over an invalid line.
    //
    // If `nullptr`, then an invalid line causes `CsvReader` to fail.
    //
    // If not `nullptr`, then an invalid line causes `CsvReader` to skip over
    // the invalid line and call the recovery function. If the recovery function
    // returns `true`, reading continues. If the recovery function returns
    // `false`, reading ends as if the end of source was encountered.
    //
    // Recovery is not applicable to reading the header with
    // `Options::required_header() != std::nullopt`.
    //
    // Calling `ReadRecord()` may cause the recovery function to be called (in
    // the same thread).
    //
    // Default: `nullptr`.
    Options& set_recovery(
        Initializer<std::function<bool(absl::Status, CsvReaderBase&)>>
            recovery) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      riegeli::Reset(recovery_, std::move(recovery));
      return *this;
    }
    Options&& set_recovery(
        Initializer<std::function<bool(absl::Status, CsvReaderBase&)>>
            recovery) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_recovery(std::move(recovery)));
    }
    std::function<bool(absl::Status, CsvReaderBase&)>& recovery()
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recovery_;
    }
    const std::function<bool(absl::Status, CsvReaderBase&)>& recovery() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recovery_;
    }

   private:
    std::optional<CsvHeader> required_header_;
    std::optional<CsvHeader> assumed_header_;
    bool preserve_utf8_bom_ = false;
    bool skip_empty_lines_ = false;
    std::optional<char> comment_;
    char field_separator_ = ',';
    std::optional<char> quote_ = '"';
    std::optional<char> escape_;
    size_t max_num_fields_ = std::numeric_limits<size_t>::max();
    size_t max_field_length_ = std::numeric_limits<size_t>::max();
    std::function<bool(absl::Status, CsvReaderBase&)> recovery_;
  };

  // Returns the byte `Reader` being read from. Unchanged by `Close()`.
  virtual Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Changes the recovery function to be called after skipping over an invalid
  // line.
  //
  // See `Options::set_recovery()` for details.
  void set_recovery(
      Initializer<std::function<bool(absl::Status, CsvReaderBase&)>> recovery) {
    riegeli::Reset(recovery_, std::move(recovery));
  }

  // Returns `true` if reading the header was requested or assumed, i.e.
  // `Options::required_header() != std::nullopt ||
  //  Options::assumed_header() != std::nullopt`.
  //
  // In this case `ReadRecord(CsvRecord&)` is supported. Otherwise no particular
  // header is assumed, and only `ReadRecord(std::vector<std::string>&)` is
  // supported.
  bool has_header() const { return has_header_; }

  // If `has_header()`, returns field names read from the first record. Returns
  // an empty header if reading the header failed.
  //
  // If `!has_header()`, returns an empty header.
  const CsvHeader& header() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return header_;
  }

  // Reads the next record expressed as `CsvRecord`, with named fields.
  //
  // The old value of `record`, including `record.header()`, is overwritten.
  //
  // If the number of fields read is not the same as expected by the header,
  // `CsvReader` fails.
  //
  // If `ReadRecord()` returns `true`, `record` will contain all fields present
  // in the `header()`, and thus it is safe to access fields whose presence has
  // been verified in the `header()`.
  //
  // Precondition:
  //   `has_header()`, i.e. `Options::required_header() != std::nullopt ||
  //                         Options::assumed_hedaer() != std::nullopt`
  //
  // Return values:
  //  * `true`                 - success (`record` is set)
  //  * `false` (when `ok()`)  - source ends (`record` is empty)
  //  * `false` (when `!ok()`) - failure (`record` is empty)
  bool ReadRecord(CsvRecord& record);

  // Reads the next record expressed as a vector of fields.
  //
  // By a common convention each record should consist of the same number of
  // fields, but this is not enforced.
  //
  // Return values:
  //  * `true`                 - success (`record` is set)
  //  * `false` (when `ok()`)  - source ends (`record` is empty)
  //  * `false` (when `!ok()`) - failure (`record` is empty)
  bool ReadRecord(std::vector<std::string>& record);

  // Determines if a record follows without reading it, but skips intervening
  // comments.
  //
  // Return values:
  //  * `true`  - `ReadRecord()` would read the next record or fail
  //  * `false` - `ReadRecord()` would report that source ends or fail
  bool HasNextRecord();

  // The index of the most recently read record, starting from 0.
  //
  // The record count does not include any header read with
  // `Options::required_header() != std::nullopt`.
  //
  // `last_record_index()` is unchanged by `Close()`.
  //
  // Precondition: some record was successfully read (`record_index() > 0`).
  uint64_t last_record_index() const;

  // The index of the next record, starting from 0.
  //
  // The record count does not include any header read with
  // `Options::required_header() != std::nullopt`.
  //
  // `record_index()` is unchanged by `Close()`.
  uint64_t record_index() const { return record_index_; }

  // The number of the first line of the most recently read record (or attempted
  // to be read), starting from 1.
  //
  // This is 1 if no record was attempted to be read.
  //
  // A line is terminated by LF or CR-LF ("\n" or "\r\n").
  //
  // `last_line_number()` is unchanged by `Close()`.
  int64_t last_line_number() const { return last_line_number_; }

  // The number of the next line, starting from 1.
  //
  // A line is terminated by LF or CR-LF ("\n" or "\r\n").
  //
  // `line_number()` is unchanged by `Close()`.
  int64_t line_number() const { return line_number_; }

 protected:
  using Object::Object;

  CsvReaderBase(CsvReaderBase&& that) noexcept;
  CsvReaderBase& operator=(CsvReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(Reader* src, Options&& options);
  // Fails, attributing this to `last_line_number()` instead of `line_number()`.
  ABSL_ATTRIBUTE_COLD void FailAtPreviousRecord(absl::Status status);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverSrc(absl::Status status);

  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;

 private:
  friend bool csv_internal::ReadStandaloneRecord(
      CsvReaderBase& csv_reader, std::vector<std::string>& record);

  enum class CharClass : uint8_t {
    kOther,
    kLf,
    kCr,
    kComment,
    kFieldSeparator,
    kQuote,
    kEscape,
  };

  ABSL_ATTRIBUTE_COLD bool FailMaxFieldLengthExceeded();
  void SkipLine(Reader& src);
  bool ReadQuoted(Reader& src, std::string& field);
  bool ReadFields(Reader& src, std::vector<std::string>& fields,
                  size_t& field_index);
  bool ReadRecordInternal(std::vector<std::string>& record);

  bool standalone_record_ = false;
  bool has_header_ = false;
  CsvHeader header_;
  // Lookup table for interpreting source characters.
  std::array<CharClass, std::numeric_limits<unsigned char>::max() + 1>
      char_classes_{};
  bool skip_empty_lines_ = false;
  // Meaningful if `char_classes_` contains `CharClass::kQuote`.
  char quote_ = '\0';
  size_t max_num_fields_ = 0;
  size_t max_field_length_ = 0;
  std::function<bool(absl::Status, CsvReaderBase&)> recovery_;
  uint64_t record_index_ = 0;
  int64_t last_line_number_ = 1;
  int64_t line_number_ = 1;
  bool recoverable_ = false;
};

// `CsvReader` reads records of a CSV (comma-separated values) file.
//
// A basic variant of CSV is specified in https://tools.ietf.org/html/rfc4180,
// and some common extensions are described in
// https://specs.frictionlessdata.io/csv-dialect/.
//
// `CsvReader` reads RFC4180-compliant CSV files, and also supports some
// extensions.
//
// By a common convention the first record consists of field names. This is
// supported by `Options::required_header()` and `ReadRecord(CsvRecord&)`.
//
// A record is terminated by a newline: LF or CR-LF ("\n" or "\r\n").
// Line terminator after the last record is optional.
//
// If skipping empty lines is requested (usually it is not), empty lines are
// skipped. If a comment character is set (usually it is not), a line beginning
// with the comment character is skipped.
//
// A record consists of a sequence of fields separated by a field separator
// (usually ',' or '\t'). Each record contains at least one field.
//
// Quotes (usually '"') around a field allow expressing special characters
// inside the field: LF, CR, comment character, field separator, or quote
// character itself.
//
// If an escape character is set (usually it is not), a character preceded by
// escape is treated literally instead of possibly having a special meaning.
// This is an alternative way of expressing special characters inside a field.
//
// To express a quote itself inside a field, it must be written twice when the
// field is quoted, or preceded by an escape character.
//
// Quotes are also useful for unambiguous interpretation of a record consisting
// of a single empty field or beginning with UTF-8 BOM.
//
// If neither a quote character nor an escape character is set, special
// characters inside fields are not expressible. In this case, reading a record
// consisting of a single empty field is incompatible with
// `Options::skip_empty_lines()`, and reading the first record beginning with
// UTF-8 BOM requires `Options::set_preserve_utf8_bom()`.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the byte `Reader`. `Src` must support
// `Dependency<Reader*, Src>`, e.g. `Reader*` (not owned, default),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The current position is synchronized with the byte `Reader` between records.
template <typename Src = Reader*>
class CsvReader : public CsvReaderBase {
 public:
  // Creates a closed `CsvReader`.
  explicit CsvReader(Closed) noexcept : CsvReaderBase(kClosed) {}

  // Will read from the byte `Reader` provided by `src`.
  explicit CsvReader(Initializer<Src> src, Options options = Options());

  CsvReader(CsvReader&& that) = default;
  CsvReader& operator=(CsvReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `CsvReader`. This avoids
  // constructing a temporary `CsvReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());

  // Returns the object providing and possibly owning the byte `Reader`.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;

 private:
  // The object providing and possibly owning the byte `Reader`.
  Dependency<Reader*, Src> src_;
};

explicit CsvReader(Closed) -> CsvReader<DeleteCtad<Closed>>;
template <typename Src>
explicit CsvReader(Src&& src,
                   CsvReaderBase::Options options = CsvReaderBase::Options())
    -> CsvReader<TargetT<Src>>;

// Reads a single record from a CSV string.
//
// A record terminator must not be present in the string.
//
// Precondition: `options.required_header() == std::nullopt`
absl::Status ReadCsvRecordFromString(
    absl::string_view src, std::vector<std::string>& record,
    CsvReaderBase::Options options = CsvReaderBase::Options());

// Implementation details follow.

inline CsvReaderBase::CsvReaderBase(CsvReaderBase&& that) noexcept
    : Object(static_cast<Object&&>(that)),
      standalone_record_(that.standalone_record_),
      has_header_(that.has_header_),
      header_(std::move(that.header_)),
      char_classes_(that.char_classes_),
      skip_empty_lines_(that.skip_empty_lines_),
      quote_(that.quote_),
      max_num_fields_(that.max_num_fields_),
      max_field_length_(that.max_field_length_),
      recovery_(std::move(that.recovery_)),
      record_index_(std::exchange(that.record_index_, 0)),
      last_line_number_(std::exchange(that.last_line_number_, 1)),
      line_number_(std::exchange(that.line_number_, 1)),
      recoverable_(std::exchange(that.recoverable_, false)) {}

inline CsvReaderBase& CsvReaderBase::operator=(CsvReaderBase&& that) noexcept {
  Object::operator=(static_cast<Object&&>(that));
  standalone_record_ = that.standalone_record_;
  has_header_ = that.has_header_;
  header_ = std::move(that.header_);
  char_classes_ = that.char_classes_;
  skip_empty_lines_ = that.skip_empty_lines_;
  quote_ = that.quote_;
  max_num_fields_ = that.max_num_fields_;
  max_field_length_ = that.max_field_length_;
  recovery_ = std::move(that.recovery_);
  record_index_ = std::exchange(that.record_index_, 0);
  last_line_number_ = std::exchange(that.last_line_number_, 1);
  line_number_ = std::exchange(that.line_number_, 1);
  recoverable_ = std::exchange(that.recoverable_, false);
  return *this;
}

inline void CsvReaderBase::Reset(Closed) {
  Object::Reset(kClosed);
  standalone_record_ = false;
  has_header_ = false;
  header_.Reset();
  recovery_ = nullptr;
  record_index_ = 0;
  last_line_number_ = 1;
  line_number_ = 1;
  recoverable_ = false;
}

inline void CsvReaderBase::Reset() {
  Object::Reset();
  standalone_record_ = false;
  has_header_ = false;
  header_.Reset();
  char_classes_ = {};
  recovery_ = nullptr;
  record_index_ = 0;
  last_line_number_ = 1;
  line_number_ = 1;
  recoverable_ = false;
}

inline uint64_t CsvReaderBase::last_record_index() const {
  RIEGELI_ASSERT_GT(record_index_, 0u)
      << "Failed precondition of CsvReaderBase::last_record_index(): "
         "no record was read";
  return record_index_ - 1;
}

template <typename Src>
inline CsvReader<Src>::CsvReader(Initializer<Src> src, Options options)
    : src_(std::move(src)) {
  Initialize(src_.get(), std::move(options));
}

template <typename Src>
inline void CsvReader<Src>::Reset(Closed) {
  CsvReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void CsvReader<Src>::Reset(Initializer<Src> src, Options options) {
  CsvReaderBase::Reset();
  src_.Reset(std::move(src));
  Initialize(src_.get(), std::move(options));
}

template <typename Src>
void CsvReader<Src>::Done() {
  CsvReaderBase::Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      FailWithoutAnnotation(AnnotateOverSrc(src_->status()));
    }
  }
}

}  // namespace riegeli

#endif  // RIEGELI_CSV_CSV_READER_H_


================================================
FILE: riegeli/csv/csv_record.cc
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/csv/csv_record.h"

#include <stddef.h>

#include <cstring>
#include <functional>
#include <initializer_list>
#include <optional>
#include <ostream>
#include <string>
#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/base/const_init.h"
#include "absl/base/optimization.h"
#include "absl/container/flat_hash_map.h"
#include "absl/status/status.h"
#include "absl/strings/ascii.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/synchronization/mutex.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/shared_ptr.h"
#include "riegeli/base/string_ref.h"
#include "riegeli/bytes/ostream_writer.h"
#include "riegeli/bytes/string_writer.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

namespace {

inline void WriteDebugQuoted(absl::string_view src, Writer& dest,
                             size_t already_scanned) {
  dest.Write('"');
  const char* start = src.data();
  const char* next_to_check = src.data() + already_scanned;
  const char* const limit = src.data() + src.size();
  // Write characters in the range [`start`..`limit`), except that if quotes are
  // found in the range [`next_to_check`..`limit`), write them twice.
  while (const char* const next_quote = static_cast<const char*>(std::memchr(
             next_to_check, '"', PtrDistance(next_to_check, limit)))) {
    dest.Write(absl::string_view(start, PtrDistance(start, next_quote + 1)));
    start = next_quote;
    next_to_check = next_quote + 1;
  }
  dest.Write(absl::string_view(start, PtrDistance(start, limit)), '"');
}

inline std::string DebugQuotedIfNeeded(absl::string_view src) {
  std::string dest;
  StringWriter<> writer(&dest);
  csv_internal::WriteDebugQuotedIfNeeded(src, writer);
  writer.Close();
  return dest;
}

}  // namespace

namespace csv_internal {

void WriteDebugQuotedIfNeeded(absl::string_view src, Writer& dest) {
  for (size_t i = 0; i < src.size(); ++i) {
    switch (src[i]) {
      // For correct CSV syntax.
      case '\n':
      case '\r':
      case ',':
      case '"':
      // For unambiguous `CsvRecord::DebugString()`.
      case ':':
      // For unambiguous appending of the rest of an error message.
      case ';':
        WriteDebugQuoted(src, dest, i);
        return;
    }
  }
  dest.Write(src);
}

}  // namespace csv_internal

std::string AsciiCaseInsensitive(absl::string_view name) {
  return absl::AsciiStrToLower(name);
}

inline CsvHeader::Payload::Payload(const Payload& that)
    : normalizer(that.normalizer),
      index_to_name(that.index_to_name),
      name_to_index(that.name_to_index) {}

ABSL_CONST_INIT absl::Mutex CsvHeader::payload_cache_mutex_(absl::kConstInit);
ABSL_CONST_INIT SharedPtr<CsvHeader::Payload> CsvHeader::payload_cache_;

CsvHeader::CsvHeader(std::initializer_list<absl::string_view> names) {
  const absl::Status status = TryResetInternal(nullptr, names);
  RIEGELI_CHECK_OK(status) << "Failed precondition of CsvHeader::CsvHeader()";
}

CsvHeader& CsvHeader::operator=(
    std::initializer_list<absl::string_view> names) {
  const absl::Status status = TryResetInternal(nullptr, names);
  RIEGELI_CHECK_OK(status) << "Failed precondition of CsvHeader::operator=()";
  return *this;
}

CsvHeader::CsvHeader(std::function<std::string(absl::string_view)> normalizer)
    : payload_(normalizer == nullptr ? nullptr
                                     : SharedPtr<Payload>(riegeli::Maker(
                                           std::move(normalizer)))) {}

CsvHeader::CsvHeader(std::function<std::string(absl::string_view)> normalizer,
                     std::initializer_list<absl::string_view> names) {
  const absl::Status status = TryResetInternal(std::move(normalizer), names);
  RIEGELI_CHECK_OK(status) << "Failed precondition of CsvHeader::CsvHeader()";
}

void CsvHeader::Reset() { payload_.Reset(); }

void CsvHeader::Reset(std::initializer_list<absl::string_view> names) {
  const absl::Status status = TryResetInternal(nullptr, names);
  RIEGELI_CHECK_OK(status) << "Failed precondition of CsvHeader::Reset()";
}

void CsvHeader::Reset(
    std::function<std::string(absl::string_view)> normalizer) {
  if (normalizer == nullptr) {
    payload_.Reset();
  } else {
    payload_.Reset(riegeli::Maker(std::move(normalizer)));
  }
}

void CsvHeader::Reset(std::function<std::string(absl::string_view)> normalizer,
                      std::initializer_list<absl::string_view> names) {
  const absl::Status status = TryResetInternal(std::move(normalizer), names);
  RIEGELI_CHECK_OK(status) << "Failed precondition of CsvHeader::Reset()";
}

absl::Status CsvHeader::TryReset(
    std::initializer_list<absl::string_view> names) {
  return TryResetInternal(nullptr, names);
}

absl::Status CsvHeader::TryReset(
    std::function<std::string(absl::string_view)> normalizer,
    std::initializer_list<absl::string_view> names) {
  return TryResetInternal(std::move(normalizer), names);
}

absl::Status CsvHeader::TryResetUncached(
    std::function<std::string(absl::string_view)>&& normalizer,
    std::vector<std::string>&& names) {
  EnsureUnique();
  payload_->normalizer = std::move(normalizer);
  payload_->name_to_index.clear();
  payload_->name_to_index.reserve(names.size());
  std::vector<absl::string_view> duplicate_names;
  for (size_t index = 0; index < names.size(); ++index) {
    const std::pair<absl::flat_hash_map<std::string, size_t>::iterator, bool>
        insert_result =
            payload_->normalizer == nullptr
                ? payload_->name_to_index.emplace(names[index], index)
                : payload_->name_to_index.emplace(
                      payload_->normalizer(names[index]), index);
    if (ABSL_PREDICT_FALSE(!insert_result.second)) {
      duplicate_names.push_back(names[index]);
    }
  }
  if (ABSL_PREDICT_FALSE(!duplicate_names.empty())) {
    payload_.Reset();
    StringWriter<std::string> message;
    message.Write("Duplicate field names: ");
    for (std::vector<absl::string_view>::const_iterator iter =
             duplicate_names.cbegin();
         iter != duplicate_names.cend(); ++iter) {
      if (iter != duplicate_names.cbegin()) message.Write(',');
      csv_internal::WriteDebugQuotedIfNeeded(*iter, message);
    }
    message.Close();
    return absl::FailedPreconditionError(message.dest());
  }
  payload_->index_to_name = std::move(names);
  if (payload_->normalizer == nullptr) {
    SharedPtr<Payload> old_payload_cache;
    {
      absl::MutexLock lock(payload_cache_mutex_);
      old_payload_cache = std::exchange(payload_cache_, payload_);
    }
    // Destroy `old_payload_cache` after releasing `payload_cache_mutex_`.
  }
  return absl::OkStatus();
}

void CsvHeader::Reserve(size_t size) {
  EnsureUnique();
  payload_->index_to_name.reserve(size);
  payload_->name_to_index.reserve(size);
}

void CsvHeader::Add(StringInitializer name) {
  const absl::Status status = TryAdd(std::move(name));
  RIEGELI_CHECK_OK(status) << "Failed precondition of CsvHeader::Add()";
}

absl::Status CsvHeader::TryAdd(StringInitializer name) {
  EnsureUnique();
  std::string name_string = std::move(name);
  const size_t index = payload_->index_to_name.size();
  const std::pair<absl::flat_hash_map<std::string, size_t>::iterator, bool>
      insert_result = payload_->normalizer == nullptr
                          ? payload_->name_to_index.emplace(name_string, index)
                          : payload_->name_to_index.emplace(
                                payload_->normalizer(name_string), index);
  if (ABSL_PREDICT_FALSE(!insert_result.second)) {
    RIEGELI_ASSERT(!empty())
        << "It should not have been needed to ensure that an empty CsvHeader "
           "has payload_ == nullptr because a duplicate field name is possible "
           "only if some fields were already present";
    StringWriter<std::string> message;
    message.Write("Duplicate field name: ");
    csv_internal::WriteDebugQuotedIfNeeded(name_string, message);
    message.Close();
    return absl::FailedPreconditionError(message.dest());
  }
  payload_->index_to_name.push_back(std::move(name_string));
  return absl::OkStatus();
}

CsvHeader::iterator CsvHeader::find(absl::string_view name) const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  if (ABSL_PREDICT_FALSE(payload_ == nullptr)) return iterator();
  const absl::flat_hash_map<std::string, size_t>::const_iterator iter =
      payload_->normalizer == nullptr
          ? payload_->name_to_index.find(name)
          : payload_->name_to_index.find(payload_->normalizer(name));
  if (ABSL_PREDICT_FALSE(iter == payload_->name_to_index.cend())) {
    return iterator(payload_->index_to_name.cend());
  }
  return iterator(payload_->index_to_name.cbegin() + iter->second);
}

bool CsvHeader::contains(absl::string_view name) const {
  if (ABSL_PREDICT_FALSE(payload_ == nullptr)) return false;
  const absl::flat_hash_map<std::string, size_t>::const_iterator iter =
      payload_->normalizer == nullptr
          ? payload_->name_to_index.find(name)
          : payload_->name_to_index.find(payload_->normalizer(name));
  return iter != payload_->name_to_index.cend();
}

std::optional<size_t> CsvHeader::IndexOf(absl::string_view name) const {
  if (ABSL_PREDICT_FALSE(payload_ == nullptr)) return std::nullopt;
  const absl::flat_hash_map<std::string, size_t>::const_iterator iter =
      payload_->normalizer == nullptr
          ? payload_->name_to_index.find(name)
          : payload_->name_to_index.find(payload_->normalizer(name));
  if (ABSL_PREDICT_FALSE(iter == payload_->name_to_index.cend())) {
    return std::nullopt;
  }
  return iter->second;
}

bool CsvHeader::Equal(const CsvHeader& a, const CsvHeader& b) {
  if (ABSL_PREDICT_TRUE(a.payload_ == b.payload_)) return true;
  if (a.payload_ == nullptr || b.payload_ == nullptr) return false;
  return a.payload_->index_to_name == b.payload_->index_to_name;
}

inline void CsvHeader::EnsureUnique() {
  if (payload_ == nullptr) {
    payload_.Reset(riegeli::Maker());
  } else if (ABSL_PREDICT_FALSE(!payload_.IsUnique())) {
    payload_ = SharedPtr<Payload>(*payload_);
  }
}

void CsvHeader::WriteDebugStringTo(Writer& dest) const {
  for (iterator iter = cbegin(); iter != cend(); ++iter) {
    if (iter != cbegin()) dest.Write(',');
    csv_internal::WriteDebugQuotedIfNeeded(*iter, dest);
  }
}

std::string CsvHeader::DebugString() const {
  std::string result;
  StringWriter<> writer(&result);
  WriteDebugStringTo(writer);
  writer.Close();
  return result;
}

void CsvHeader::Output(std::ostream& dest) const {
  OStreamWriter<> writer(&dest);
  WriteDebugStringTo(writer);
  writer.Close();
}

CsvRecord::CsvRecord(CsvHeader header,
                     std::initializer_list<absl::string_view> fields) {
  const absl::Status status = TryResetInternal(std::move(header), fields);
  RIEGELI_CHECK_OK(status) << "Failed precondition of CsvRecord::CsvRecord()";
}

void CsvRecord::Reset() {
  header_.Reset();
  fields_.clear();
}

void CsvRecord::Reset(CsvHeader header) {
  header_ = std::move(header);
  fields_.resize(header_.size());
  Clear();
}

void CsvRecord::Reset(CsvHeader header,
                      std::initializer_list<absl::string_view> fields) {
  const absl::Status status = TryResetInternal(std::move(header), fields);
  RIEGELI_CHECK_OK(status) << "Failed precondition of CsvRecord::Reset()";
}

absl::Status CsvRecord::TryReset(
    CsvHeader header, std::initializer_list<absl::string_view> fields) {
  return TryResetInternal(std::move(header), fields);
}

absl::Status CsvRecord::TryResetInternal(CsvHeader&& header,
                                         std::vector<std::string>&& fields) {
  if (ABSL_PREDICT_FALSE(header.size() != fields.size())) {
    header_.Reset();
    fields_.clear();
    return absl::FailedPreconditionError(
        absl::StrCat("Mismatched number of CSV fields: header has ",
                     header.size(), ", record has ", fields.size()));
  }
  header_ = std::move(header);
  fields_ = std::move(fields);
  return absl::OkStatus();
}

void CsvRecord::Clear() {
  for (std::string& value : fields_) value.clear();
}

std::string& CsvRecord::operator[](absl::string_view name)
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  const CsvHeader::iterator name_iter = header_.find(name);
  RIEGELI_CHECK(name_iter != header_.end())
      << "Failed precondition of CsvRecord::operator[]: missing field name: "
      << DebugQuotedIfNeeded(name) << "; existing field names: " << header_;
  return fields_[name_iter - header_.begin()];
}

const std::string& CsvRecord::operator[](absl::string_view name) const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  const CsvHeader::iterator name_iter = header_.find(name);
  RIEGELI_CHECK(name_iter != header_.end())
      << "Failed precondition of CsvRecord::operator[]: missing field name: "
      << DebugQuotedIfNeeded(name) << "; existing field names: " << header_;
  return fields_[name_iter - header_.begin()];
}

CsvRecord::iterator CsvRecord::find(absl::string_view name)
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  const CsvHeader::iterator name_iter = header_.find(name);
  RIEGELI_ASSERT(name_iter >= header_.begin() && name_iter <= header_.end())
      << "Failed precondition of CsvRecord::find(): "
         "field name iterator does not belong to the same header";
  return iterator(name_iter, fields_.begin() + (name_iter - header_.begin()));
}

CsvRecord::const_iterator CsvRecord::find(absl::string_view name) const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  const CsvHeader::iterator name_iter = header_.find(name);
  RIEGELI_ASSERT(name_iter >= header_.begin() && name_iter <= header_.end())
      << "Failed precondition of CsvRecord::find(): "
         "field name iterator does not belong to the same header";
  return const_iterator(name_iter,
                        fields_.cbegin() + (name_iter - header_.begin()));
}

bool CsvRecord::contains(absl::string_view name) const {
  return header_.contains(name);
}

void CsvRecord::Merge(
    std::initializer_list<std::pair<absl::string_view, absl::string_view>>
        src) {
  Merge<std::initializer_list<std::pair<absl::string_view, absl::string_view>>>(
      std::move(src));
}

absl::Status CsvRecord::TryMerge(
    std::initializer_list<std::pair<absl::string_view, absl::string_view>>
        src) {
  return TryMerge<
      std::initializer_list<std::pair<absl::string_view, absl::string_view>>>(
      std::move(src));
}

absl::Status CsvRecord::FailMissingNames(
    absl::Span<const std::string> missing_names) const {
  StringWriter<std::string> message;
  message.Write("Missing field names: ");
  for (absl::Span<const std::string>::const_iterator iter =
           missing_names.cbegin();
       iter != missing_names.cend(); ++iter) {
    if (iter != missing_names.cbegin()) message.Write(',');
    csv_internal::WriteDebugQuotedIfNeeded(*iter, message);
  }
  message.Write("; existing field names: ");
  for (CsvHeader::const_iterator iter = header_.cbegin();
       iter != header_.cend(); ++iter) {
    if (iter != header_.cbegin()) message.Write(',');
    csv_internal::WriteDebugQuotedIfNeeded(*iter, message);
  }
  message.Close();
  return absl::FailedPreconditionError(message.dest());
}

bool CsvRecord::Equal(const CsvRecord& a, const CsvRecord& b) {
  return a.header() == b.header() && a.fields() == b.fields();
}

void CsvRecord::WriteDebugStringTo(Writer& dest) const {
  RIEGELI_ASSERT_EQ(header_.size(), fields_.size())
      << "Failed invariant of CsvRecord: "
         "mismatched length of CSV header and fields";
  for (const_iterator iter = cbegin(); iter != cend(); ++iter) {
    if (iter != cbegin()) dest.Write(',');
    csv_internal::WriteDebugQuotedIfNeeded(iter->first, dest);
    dest.Write(':');
    csv_internal::WriteDebugQuotedIfNeeded(iter->second, dest);
  }
}

std::string CsvRecord::DebugString() const {
  std::string result;
  StringWriter<> writer(&result);
  WriteDebugStringTo(writer);
  writer.Close();
  return result;
}

void CsvRecord::Output(std::ostream& dest) const {
  OStreamWriter<> writer(&dest);
  WriteDebugStringTo(writer);
  writer.Close();
}

}  // namespace riegeli


================================================
FILE: riegeli/csv/csv_record.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CSV_CSV_RECORD_H_
#define RIEGELI_CSV_CSV_RECORD_H_

#include <stddef.h>

#include <algorithm>
#include <functional>
#include <initializer_list>
#include <iosfwd>
#include <iterator>
#include <new>
#include <optional>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/base/call_once.h"
#include "absl/base/optimization.h"
#include "absl/base/thread_annotations.h"
#include "absl/container/flat_hash_map.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "absl/synchronization/mutex.h"
#include "absl/types/span.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/global.h"
#include "riegeli/base/iterable.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/shared_ptr.h"
#include "riegeli/base/string_ref.h"
#include "riegeli/base/type_traits.h"
#include "riegeli/bytes/stringify_writer.h"
#include "riegeli/bytes/writer.h"

namespace riegeli::csv_internal {

// `ToStringVector()` converts an iterable of elements convertible to
// `absl::string_view` to a `std::vector<std::string>`.

template <
    typename Values,
    std::enable_if_t<IsIterableOf<Values, absl::string_view>::value, int> = 0>
inline std::vector<std::string> ToStringVector(Values&& values) {
  using std::begin;
  using std::end;
  return std::vector<std::string>(MaybeMakeMoveIterator<Values>(begin(values)),
                                  MaybeMakeMoveIterator<Values>(end(values)));
}

}  // namespace riegeli::csv_internal

namespace riegeli {

// A normalizer for `CsvHeader` and `CsvReaderBase::Options::set_normalizer()`,
// providing case insensitive matching.
//
// You may pass a pointer to this function, without wrapping it in a lambda
// (it will not be overloaded).
std::string AsciiCaseInsensitive(absl::string_view name);

// A set of field names. This is commonly specified in a CSV file header.
//
// This is conceptually a set of strings, which remembers the order in which
// they have been added.
//
// Copying a `CsvHeader` object is cheap, sharing the actual set.
class CsvHeader : public WithEqual<CsvHeader> {
 public:
  class iterator : public WithCompare<iterator> {
   public:
    using iterator_category = std::random_access_iterator_tag;
    using value_type = std::string;
    using reference = const std::string&;
    using pointer = const std::string*;
    using difference_type = ptrdiff_t;

    iterator() = default;

    iterator(const iterator& that) = default;
    iterator& operator=(const iterator& that) = default;

    reference operator*() const;
    pointer operator->() const;
    iterator& operator++();
    iterator operator++(int);
    iterator& operator--();
    iterator operator--(int);
    iterator& operator+=(difference_type n);
    iterator operator+(difference_type n) const;
    iterator& operator-=(difference_type n);
    iterator operator-(difference_type n) const;
    reference operator[](difference_type n) const;

    friend bool operator==(iterator a, iterator b) {
      return a.iter_ == b.iter_;
    }
    friend StrongOrdering RIEGELI_COMPARE(iterator a, iterator b) {
      if (a.iter_ < b.iter_) return StrongOrdering::less;
      if (a.iter_ > b.iter_) return StrongOrdering::greater;
      return StrongOrdering::equal;
    }
    friend difference_type operator-(iterator a, iterator b) {
      return a.iter_ - b.iter_;
    }
    friend iterator operator+(difference_type n, iterator a) { return a + n; }

   private:
    friend class CsvHeader;

    explicit iterator(std::vector<std::string>::const_iterator iter)
        : iter_(iter) {}

    std::vector<std::string>::const_iterator iter_{};
  };

  using key_type = std::string;
  using value_type = std::string;
  using reference = const std::string&;
  using const_reference = reference;
  using pointer = const std::string*;
  using const_pointer = pointer;
  using const_iterator = iterator;
  using reverse_iterator = std::reverse_iterator<iterator>;
  using const_reverse_iterator = reverse_iterator;
  using size_type = size_t;
  using difference_type = ptrdiff_t;

  // Creates a `CsvHeader` with no field names.
  CsvHeader() = default;

  // Creates a `CsvHeader` with no field names.
  //
  // Field names are matched by passing them through `normalizer` first.
  // `nullptr` is the same as the identity function.
  //
  // `riegeli::AsciiCaseInsensitive` is a normalizer providing case insensitive
  // matching.
  explicit CsvHeader(std::function<std::string(absl::string_view)> normalizer);

  // Creates a set consisting of the given sequence of field names.
  //
  // The type of `names` must support iteration yielding `absl::string_view`:
  // `for (const absl::string_view name : names)`,
  // e.g. `std::vector<std::string>`.
  //
  // Precondition: `names` have no duplicates
  template <typename Names,
            std::enable_if_t<
                std::conjunction_v<NotSameRef<CsvHeader, Names>,
                                   IsIterableOf<Names, absl::string_view>>,
                int> = 0>
  explicit CsvHeader(Names&& names);
  /*implicit*/ CsvHeader(std::initializer_list<absl::string_view> names);
  CsvHeader& operator=(std::initializer_list<absl::string_view> names);

  // Creates a set consisting of the given sequence of field names.
  //
  // The type of `names` must support iteration yielding `absl::string_view`:
  // `for (const absl::string_view name : names)`,
  // e.g. `std::vector<std::string>`.
  //
  // Field names are matched by passing them through `normalizer` first.
  // `nullptr` is the same as the identity function.
  //
  // `riegeli::AsciiCaseInsensitive` is a normalizer providing case insensitive
  // matching.
  //
  // Precondition: normalized `names` have no duplicates
  template <
      typename Names,
      std::enable_if_t<IsIterableOf<Names, absl::string_view>::value, int> = 0>
  explicit CsvHeader(std::function<std::string(absl::string_view)> normalizer,
                     Names&& names);
  explicit CsvHeader(std::function<std::string(absl::string_view)> normalizer,
                     std::initializer_list<absl::string_view> names);

  CsvHeader(const CsvHeader& that) = default;
  CsvHeader& operator=(const CsvHeader& that) = default;

  // The source `CsvHeader` is left empty.
  CsvHeader(CsvHeader&& that) = default;
  CsvHeader& operator=(CsvHeader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `CsvHeader`.
  //
  // Precondition: like for the corresponding constructor
  ABSL_ATTRIBUTE_REINITIALIZES void Reset();
  template <typename Names,
            std::enable_if_t<
                std::conjunction_v<NotSameRef<CsvHeader, Names>,
                                   IsIterableOf<Names, absl::string_view>>,
                int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Names&& names);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      std::initializer_list<absl::string_view> names);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      std::function<std::string(absl::string_view)> normalizer);
  template <
      typename Names,
      std::enable_if_t<IsIterableOf<Names, absl::string_view>::value, int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      std::function<std::string(absl::string_view)> normalizer, Names&& names);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      std::function<std::string(absl::string_view)> normalizer,
      std::initializer_list<absl::string_view> names);

  // Makes `*this` equivalent to a newly constructed `CsvHeader`, reporting
  // whether construction was successful.
  //
  // Return values:
  //  * `absl::OkStatus()`                 - `CsvHeader` is set to `names`
  //  * `absl::FailedPreconditionError(_)` - `names` had duplicates,
  //                                         `CsvHeader` is empty
  template <typename Names,
            std::enable_if_t<
                std::conjunction_v<NotSameRef<CsvHeader, Names>,
                                   IsIterableOf<Names, absl::string_view>>,
                int> = 0>
  absl::Status TryReset(Names&& names);
  absl::Status TryReset(std::initializer_list<absl::string_view> names);
  template <
      typename Names,
      std::enable_if_t<IsIterableOf<Names, absl::string_view>::value, int> = 0>
  absl::Status TryReset(
      std::function<std::string(absl::string_view)> normalizer, Names&& names);
  absl::Status TryReset(
      std::function<std::string(absl::string_view)> normalizer,
      std::initializer_list<absl::string_view> names);

  // Reserve space for future calls to `Add()` or `TryAdd()` if the expected
  // final number of fields is known. This improves performance.
  void Reserve(size_t size);

  // Adds the given field `name`, ordered at the end.
  //
  // Precondition: `name` was not already present
  void Add(StringInitializer name);

  // Equivalent to calling `Add()` for each name in order.
  //
  // Precondition: like for `Add()`
  template <typename... Names,
            std::enable_if_t<(sizeof...(Names) > 0), int> = 0>
  void Add(StringInitializer name, Names&&... names);

  // Adds the given field `name`, ordered at the end, reporting whether this was
  // successful.
  //
  // Return values:
  //  * `absl::OkStatus()`                 - `name` has been added
  //  * `absl::FailedPreconditionError(_)` - `name` was already present,
  //                                         `CsvHeader` is unchanged
  absl::Status TryAdd(StringInitializer name);

  // Equivalent to calling `TryAdd()` for each name in order.
  //
  // Returns early in case of a failure.
  template <typename... Names,
            std::enable_if_t<(sizeof...(Names) > 0), int> = 0>
  absl::Status TryAdd(StringInitializer name, Names&&... names);

  // Returns the sequence of field names, in the order in which they have been
  // added.
  absl::Span<const std::string> names() const ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Returns the normalizer used to match field names, or `nullptr` which is the
  // same as the identity function.
  const std::function<std::string(absl::string_view)>& normalizer() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Iterates over field names, in the order in which they have been added.
  iterator begin() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
  iterator cbegin() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return begin(); }
  iterator end() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
  iterator cend() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return end(); }

  // Iterates over field names, backwards.
  reverse_iterator rbegin() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return reverse_iterator(end());
  }
  reverse_iterator crbegin() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return rbegin();
  }
  reverse_iterator rend() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return reverse_iterator(begin());
  }
  reverse_iterator crend() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return rend();
  }

  // Returns `true` if there are no field names.
  bool empty() const;

  // Returns the number of field names.
  size_t size() const;

  // Returns an iterator positioned at `name`, or `end()` if `name` is not
  // present.
  iterator find(absl::string_view name) const ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Returns `true` if `name` is present.
  bool contains(absl::string_view name) const;

  // Returns the position of `name` in the sequence of field names, or
  // `std::nullopt` if `name` is not present.
  //
  // This can be used together with `CsvRecord::fields()` to look up the same
  // field in multiple `CsvRecord`s sharing a `CsvHeader`.
  std::optional<size_t> IndexOf(absl::string_view name) const;

  // Compares the sequence of field names. Does not compare the normalizer.
  friend bool operator==(const CsvHeader& a, const CsvHeader& b) {
    return Equal(a, b);
  }

  // Renders contents in a human-readable way.
  std::string DebugString() const;

  // Default stringification by `absl::StrCat()` etc.
  //
  // Writes `src.DebugString()` to `dest`.
  template <typename Sink>
  friend void AbslStringify(Sink& dest, const CsvHeader& src) {
    StringifyWriter<Sink*> writer(&dest);
    src.WriteDebugStringTo(writer);
    writer.Close();
  }

  // Writes `src.DebugString()` to `dest`.
  friend std::ostream& operator<<(std::ostream& dest, const CsvHeader& src) {
    src.Output(dest);
    return dest;
  }

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const CsvHeader* self,
                                        MemoryEstimator& memory_estimator) {
    memory_estimator.RegisterSubobjects(&self->payload_);
  }

 private:
  struct Payload {
    Payload() = default;
    Payload(std::function<std::string(absl::string_view)>&& normalizer)
        : normalizer(std::move(normalizer)) {}
    Payload(const Payload& that);

    // Supports `MemoryEstimator`.
    template <typename MemoryEstimator>
    friend void RiegeliRegisterSubobjects(const Payload* self,
                                          MemoryEstimator& memory_estimator) {
      // Ignore `normalizer`. Even if not `nullptr`, usually it is stateless.
      memory_estimator.RegisterSubobjects(&self->index_to_name);
      memory_estimator.RegisterSubobjects(&self->name_to_index);
    }

    std::function<std::string(absl::string_view)> normalizer;
    std::vector<std::string> index_to_name;
    absl::flat_hash_map<std::string, size_t> name_to_index;

    // Invariants:
    //  * `!index_to_name.empty()`
    //  * `name_to_index.size() == index_to_name.size()`
    //  * for each `i` below `index_to_name.size()`:
    //        `name_to_index[normalizer == nullptr
    //                           ? index_to_name[i]
    //                           : normalizer(index_to_name[i])] == i`
  };

  template <typename Names>
  absl::Status TryResetInternal(
      std::function<std::string(absl::string_view)>&& normalizer,
      Names&& names);
  absl::Status TryResetInternal(
      std::function<std::string(absl::string_view)>&& normalizer,
      std::vector<std::string>&& names);

  // Handles `TryReset()` for `names` which are empty or which match the cached
  // payload. Returns `true` if done.
  template <typename Names>
  bool MaybeResetToCachedPayload(Names&& names);

  // Handles `TryReset()` for `names` which are not empty and which do not match
  // the cached payload.
  absl::Status TryResetUncached(
      std::function<std::string(absl::string_view)>&& normalizer,
      std::vector<std::string>&& names);

  void EnsureUnique();

  static bool Equal(const CsvHeader& a, const CsvHeader& b);

  void WriteDebugStringTo(Writer& dest) const;
  void Output(std::ostream& dest) const;

  // A one-element cache of a recently constructed `Payload`, to reuse the
  // `Payload` when multiple `CsvHeader` objects are created from the same
  // iterable of field names. Its `normalizer` is always `nullptr` and its
  // `index_to_name` is never empty.
  //
  // Reusing `CsvHeader` directly is more efficient but not always feasible.
  ABSL_CONST_INIT static absl::Mutex payload_cache_mutex_;
  ABSL_CONST_INIT static SharedPtr<Payload> payload_cache_
      ABSL_GUARDED_BY(payload_cache_mutex_);

  SharedPtr<Payload> payload_;
};

// `CsvHeaderConstant<n>` lazily constructs and stores a `CsvHeader` with `n`
// fields, and never calls its destructor.
//
// It should be used as the type of a variable with static storage duration.
//
// By relying on CTAD the template argument can be deduced from constructor
// arguments.
template <size_t num_fields>
class CsvHeaderConstant {
 public:
  // Will create a `CsvHeader` consisting of the given sequence of field names.
  //
  // The number of `fields` must be `num_fields`, and all `fields` must have
  // static storage duration.
  template <
      typename... Fields,
      std::enable_if_t<std::conjunction_v<
                           std::bool_constant<sizeof...(Fields) == num_fields>,
                           std::is_convertible<Fields&&, absl::string_view>...>,
                       int> = 0>
  /*implicit*/ constexpr CsvHeaderConstant(
      Fields&&... fields ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : fields_{std::forward<Fields>(fields)...} {}

  // Will create a `CsvHeader` consisting of the given sequence of field names.
  //
  // The number of `fields` must be `num_fields`, and all `fields` must have
  // static storage duration.
  //
  // Field names are matched by passing them through `normalizer` first.
  // `nullptr` is the same as the identity function.
  template <
      typename... Fields,
      std::enable_if_t<std::conjunction_v<
                           std::bool_constant<sizeof...(Fields) == num_fields>,
                           std::is_convertible<Fields&&, absl::string_view>...>,
                       int> = 0>
  explicit constexpr CsvHeaderConstant(
      std::string (*normalizer)(absl::string_view),
      Fields&&... fields ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : normalizer_(normalizer), fields_{std::forward<Fields>(fields)...} {}

  // Will create a `CsvHeader` consisting of field names from `base_header`
  // followed by the given sequence of field names.
  //
  // The number of fields in `base_header` plus the number of `fields` must be
  // `num_fields`, and `base_header` and all `fields` must have static storage
  // duration.
  //
  // The normalizer is the same as in `base_header`.
  template <
      size_t base_num_fields, typename... Fields,
      std::enable_if_t<
          std::conjunction_v<
              std::integral_constant<
                  bool, base_num_fields + sizeof...(Fields) == num_fields>,
              std::is_convertible<Fields&&, absl::string_view>...>,
          int> = 0>
  explicit constexpr CsvHeaderConstant(
      const CsvHeaderConstant<base_num_fields>& base_header
          ABSL_ATTRIBUTE_LIFETIME_BOUND,
      Fields&&... fields ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : CsvHeaderConstant(base_header,
                          std::make_index_sequence<base_num_fields>(),
                          std::forward<Fields>(fields)...) {}

  CsvHeaderConstant(const CsvHeaderConstant&) = delete;
  CsvHeaderConstant& operator=(const CsvHeaderConstant&) = delete;

  const CsvHeader* get() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const CsvHeader& operator*() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return *get();
  }
  const CsvHeader* operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return get();
  }

 private:
  template <size_t other_num_fields>
  friend class CsvHeaderConstant;  // For `normalizer_` and `fields_`.

  template <size_t base_num_fields, size_t... base_indices, typename... Fields>
  explicit constexpr CsvHeaderConstant(
      const CsvHeaderConstant<base_num_fields>& base_header,
      std::index_sequence<base_indices...>, Fields&&... fields)
      : normalizer_(base_header.normalizer_),
        fields_{base_header.fields_[base_indices]...,
                std::forward<Fields>(fields)...} {}

  std::string (*const normalizer_)(absl::string_view) = nullptr;
  const absl::string_view fields_[num_fields];
  mutable absl::once_flag once_;
  alignas(CsvHeader) mutable char header_[sizeof(CsvHeader)] = {};
};

template <
    typename... Fields,
    std::enable_if_t<
        std::conjunction_v<std::is_convertible<Fields&&, absl::string_view>...>,
        int> = 0>
/*implicit*/ CsvHeaderConstant(Fields&&... fields)
    -> CsvHeaderConstant<sizeof...(Fields)>;
template <
    typename... Fields,
    std::enable_if_t<
        std::conjunction_v<std::is_convertible<Fields&&, absl::string_view>...>,
        int> = 0>
explicit CsvHeaderConstant(std::string (*normalizer)(absl::string_view),
                           Fields&&... fields)
    -> CsvHeaderConstant<sizeof...(Fields)>;
template <
    size_t base_num_fields, typename... Fields,
    std::enable_if_t<
        std::conjunction_v<std::is_convertible<Fields&&, absl::string_view>...>,
        int> = 0>
explicit CsvHeaderConstant(
    const CsvHeaderConstant<base_num_fields>& base_header, Fields&&... fields)
    -> CsvHeaderConstant<base_num_fields + sizeof...(Fields)>;

// A row of a CSV file, with fields accessed by name.
//
// This is conceptually a mapping from field names to field values, with a fixed
// set of field names. The set of field names is expressed as `CsvHeader`.
class CsvRecord : public WithEqual<CsvRecord> {
 private:
  // Implementation shared between `iterator` and `const_iterator`.
  template <typename FieldIterator>
  class IteratorImpl : public WithCompare<IteratorImpl<FieldIterator>> {
   public:
    using iterator_concept = std::random_access_iterator_tag;
    // `iterator_category` is only `std::input_iterator_tag` because the
    // `LegacyForwardIterator` requirement and above require `reference` to be
    // a true reference type.
    using iterator_category = std::input_iterator_tag;
    using value_type = std::pair<const std::string, std::string>;
    using reference =
        ReferencePair<const std::string&,
                      typename std::iterator_traits<FieldIterator>::reference>;
    using pointer = ArrowProxy<reference>;
    using difference_type = ptrdiff_t;

    IteratorImpl() = default;

    // Conversion from `iterator` to `const_iterator`.
    template <
        typename ThatFieldIterator,
        std::enable_if_t<
            std::conjunction_v<
                std::negation<std::is_same<ThatFieldIterator, FieldIterator>>,
                std::is_convertible<ThatFieldIterator, FieldIterator>>,
            int> = 0>
    /*implicit*/ IteratorImpl(IteratorImpl<ThatFieldIterator> that) noexcept;

    IteratorImpl(const IteratorImpl& that) = default;
    IteratorImpl& operator=(const IteratorImpl& that) = default;

    reference operator*() const;
    pointer operator->() const;
    IteratorImpl& operator++();
    IteratorImpl operator++(int);
    IteratorImpl& operator--();
    IteratorImpl operator--(int);
    IteratorImpl& operator+=(difference_type n);
    IteratorImpl operator+(difference_type n) const;
    IteratorImpl& operator-=(difference_type n);
    IteratorImpl operator-(difference_type n) const;
    reference operator[](difference_type n) const;

    friend bool operator==(IteratorImpl a, IteratorImpl b) {
      return a.field_iter_ == b.field_iter_;
    }
    friend StrongOrdering RIEGELI_COMPARE(IteratorImpl a, IteratorImpl b) {
      if (a.field_iter_ < b.field_iter_) return StrongOrdering::less;
      if (a.field_iter_ > b.field_iter_) return StrongOrdering::greater;
      return StrongOrdering::equal;
    }
    friend difference_type operator-(IteratorImpl a, IteratorImpl b) {
      return a.field_iter_ - b.field_iter_;
    }
    friend IteratorImpl operator+(difference_type n, IteratorImpl a) {
      return a + n;
    }

   private:
    friend class CsvRecord;

    explicit IteratorImpl(CsvHeader::iterator name_iter,
                          FieldIterator field_iter);

    // Invariant:
    //   `name_iter_ - header_.begin() == field_iter_ - fields_.begin()`
    CsvHeader::iterator name_iter_;
    FieldIterator field_iter_;
  };

 public:
  using key_type = std::string;
  using mapped_type = std::string;
  using value_type = std::pair<const std::string, std::string>;
  using reference = ReferencePair<const std::string&, std::string&>;
  using const_reference = ReferencePair<const std::string&, const std::string&>;
  using iterator = IteratorImpl<std::vector<std::string>::iterator>;
  using const_iterator = IteratorImpl<std::vector<std::string>::const_iterator>;
  using reverse_iterator = std::reverse_iterator<iterator>;
  using const_reverse_iterator = std::reverse_iterator<const_iterator>;
  using size_type = size_t;
  using difference_type = ptrdiff_t;

  // Creates a `CsvRecord` with no fields.
  CsvRecord() = default;

  // Creates a `CsvRecord` with the given field names, and with all field values
  // empty.
  explicit CsvRecord(CsvHeader header);

  // Creates a `CsvRecord` with the given field names and field values in the
  // corresponding order.
  //
  // Precondition: `header.size() == fields.size()`
  template <
      typename Fields,
      std::enable_if_t<IsIterableOf<Fields, absl::string_view>::value, int> = 0>
  explicit CsvRecord(CsvHeader header, Fields&& fields);
  explicit CsvRecord(CsvHeader header,
                     std::initializer_list<absl::string_view> fields);

  CsvRecord(const CsvRecord& that) = default;
  CsvRecord& operator=(const CsvRecord& that) = default;

  // The source `CsvRecord` is left empty.
  CsvRecord(CsvRecord&& that) = default;
  CsvRecord& operator=(CsvRecord&& that) = default;

  // Returns the set of field names.
  const CsvHeader& header() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return header_;
  }

  // Makes `*this` equivalent to a newly constructed `CsvRecord`.
  //
  // Precondition: like for the corresponding constructor
  ABSL_ATTRIBUTE_REINITIALIZES void Reset();
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(CsvHeader header);
  template <
      typename Fields,
      std::enable_if_t<IsIterableOf<Fields, absl::string_view>::value, int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(CsvHeader header, Fields&& fields);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      CsvHeader header, std::initializer_list<absl::string_view> fields);

  // Makes `*this` equivalent to a newly constructed `CsvRecord`, reporting
  // whether construction was successful.
  //
  // Return values:
  //  * `absl::OkStatus()`                 - `CsvRecord` is set to `header`
  //                                         and `fields`
  //  * `absl::FailedPreconditionError(_)` - lengths of `header` and `fields`
  //                                         do not match, `CsvRecord` is empty
  template <
      typename Fields,
      std::enable_if_t<IsIterableOf<Fields, absl::string_view>::value, int> = 0>
  absl::Status TryReset(CsvHeader header, Fields&& fields);
  absl::Status TryReset(CsvHeader header,
                        std::initializer_list<absl::string_view> fields);

  // Makes all field values empty. The number of fields is unchanged.
  void Clear();

  // Returns the sequence of field values, in the order corresponding to the
  // order of field names in the header.
  absl::Span<std::string> fields() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return absl::MakeSpan(fields_);
  }
  absl::Span<const std::string> fields() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return fields_;
  }

  // Iterates over pairs of field names and field values, in the order
  // corresponding to the order of field names in the header.
  iterator begin() ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const_iterator begin() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const_iterator cbegin() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return begin();
  }
  iterator end() ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const_iterator end() const ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const_iterator cend() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return end(); }

  // Iterates over pairs of field names and field values, backwards.
  reverse_iterator rbegin() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return reverse_iterator(end());
  }
  const_reverse_iterator rbegin() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return const_reverse_iterator(end());
  }
  const_reverse_iterator crbegin() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return rbegin();
  }
  reverse_iterator rend() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return reverse_iterator(begin());
  }
  const_reverse_iterator rend() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return const_reverse_iterator(begin());
  }
  const_reverse_iterator crend() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return rend();
  }

  // Returns `true` if there are no fields.
  bool empty() const { return fields_.empty(); }

  // Returns the number of field names, which is the same as the number of field
  // values.
  size_t size() const { return fields_.size(); }

  // Returns a reference to the field value corresponding to the given field
  // `name`.
  //
  // Precondition: `name` is present
  std::string& operator[](absl::string_view name) ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const std::string& operator[](absl::string_view name) const
      ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Returns an iterator positioned at the pair of the given field `name` and
  // the corresponding field value, or `end()` if `name` is not present.
  iterator find(absl::string_view name) ABSL_ATTRIBUTE_LIFETIME_BOUND;
  const_iterator find(absl::string_view name) const
      ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Returns `true` if `name` is present.
  bool contains(absl::string_view name) const;

  // Sets all fields resulting from iteration over another iterable of pairs of
  // field names and field values, which can be an associative container or
  // another `CsvRecord`.
  //
  // This can be used to convert a `CsvRecord` to a superset of fields, as long
  // as fields to be preserved have the same names.
  //
  // Preconditions:
  //  * all fields from `src` are present in `*this`
  template <
      typename Src,
      std::enable_if_t<IsIterableOf<Src, std::pair<absl::string_view,
                                                   absl::string_view>>::value,
                       int> = 0>
  void Merge(Src&& src);
  void Merge(
      std::initializer_list<std::pair<absl::string_view, absl::string_view>>
          src);

  // Sets all fields resulting from iteration over another iterable of pairs of
  // field names and field values, which can be an associative container or
  // another `CsvRecord`. Reports whether that was successful.
  //
  // This can be used to convert a `CsvRecord` to a different set of fields, as
  // long as fields to be preserved have the same names.
  //
  // Return values:
  //  * `absl::OkStatus()`                 - all fields from `src` have been set
  //  * `absl::FailedPreconditionError(_)` - some fields were absent in `*this`,
  //                                         only the intersection of fields
  //                                         has been set
  template <
      typename Src,
      std::enable_if_t<IsIterableOf<Src, std::pair<absl::string_view,
                                                   absl::string_view>>::value,
                       int> = 0>
  absl::Status TryMerge(Src&& src);
  absl::Status TryMerge(
      std::initializer_list<std::pair<absl::string_view, absl::string_view>>
          src);

  // Assigns corresponding field values to all field values resulting from
  // iteration over another iterable of pairs of field names and field values,
  // which can be an associative container or another `CsvRecord`.
  //
  // This can be used to project a `CsvRecord` to a subset of fields, as long
  // as fields to be preserved have the same names.
  //
  // Preconditions:
  //  * all fields from `dest` are present in `*this`
  template <typename Dest,
            std::enable_if_t<IsIterableOfPairsWithAssignableValues<
                                 Dest, absl::string_view, std::string>::value,
                             int> = 0>
  void Split(Dest& dest) const;

  // Assigns corresponding field values to all field values resulting from
  // iteration over another iterable of pairs of field names and field values,
  // which can be an associative container or another `CsvRecord`. Reports
  // whether that was successful.
  //
  // This can be used to project a `CsvRecord` to a subset of fields, as long
  // as fields to be preserved have the same names.
  //
  // Return values:
  //  * `absl::OkStatus()`                 - all fields in `dest` have been set
  //  * `absl::FailedPreconditionError(_)` - some fields were absent in `*this`,
  //                                         only the intersection of fields
  //                                         has been set
  template <typename Dest,
            std::enable_if_t<IsIterableOfPairsWithAssignableValues<
                                 Dest, absl::string_view, std::string>::value,
                             int> = 0>
  absl::Status TrySplit(Dest& dest) const;

  friend bool operator==(const CsvRecord& a, const CsvRecord& b) {
    return Equal(a, b);
  }

  // Renders contents in a human-readable way.
  std::string DebugString() const;

  // Default stringification by `absl::StrCat()` etc.
  //
  // Writes `src.DebugString()` to `dest`.
  template <typename Sink>
  friend void AbslStringify(Sink& dest, const CsvRecord& src) {
    StringifyWriter<Sink*> writer(&dest);
    src.WriteDebugStringTo(writer);
    writer.Close();
  }

  // Writes `src.DebugString()` to `dest`.
  friend std::ostream& operator<<(std::ostream& dest, const CsvRecord& src) {
    src.Output(dest);
    return dest;
  }

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const CsvRecord* self,
                                        MemoryEstimator& memory_estimator) {
    memory_estimator.RegisterSubobjects(&self->header_);
    memory_estimator.RegisterSubobjects(&self->fields_);
  }

 private:
  friend class CsvReaderBase;  // For `fields_`.

  template <typename Fields>
  absl::Status TryResetInternal(CsvHeader&& header, Fields&& fields);
  absl::Status TryResetInternal(CsvHeader&& header,
                                std::vector<std::string>&& fields);

  absl::Status FailMissingNames(
      absl::Span<const std::string> missing_names) const;

  static bool Equal(const CsvRecord& a, const CsvRecord& b);

  void WriteDebugStringTo(Writer& dest) const;
  void Output(std::ostream& dest) const;

  // Invariant: `header_.size() == fields_.size()`
  CsvHeader header_;
  std::vector<std::string> fields_;
};

namespace csv_internal {
void WriteDebugQuotedIfNeeded(absl::string_view src, Writer& dest);
}  // namespace csv_internal

// Implementation details follow.

inline typename CsvHeader::iterator::reference CsvHeader::iterator::operator*()
    const {
  return *iter_;
}

inline typename CsvHeader::iterator::pointer CsvHeader::iterator::operator->()
    const {
  return &*iter_;
}

inline CsvHeader::iterator& CsvHeader::iterator::operator++() {
  ++iter_;
  return *this;
}

inline CsvHeader::iterator CsvHeader::iterator::operator++(int) {
  const iterator tmp = *this;
  ++*this;
  return tmp;
}

inline CsvHeader::iterator& CsvHeader::iterator::operator--() {
  --iter_;
  return *this;
}

inline CsvHeader::iterator CsvHeader::iterator::operator--(int) {
  const iterator tmp = *this;
  --*this;
  return tmp;
}

inline CsvHeader::iterator& CsvHeader::iterator::operator+=(difference_type n) {
  iter_ += n;
  return *this;
}

inline CsvHeader::iterator CsvHeader::iterator::operator+(
    difference_type n) const {
  return iterator(*this) += n;
}

inline CsvHeader::iterator& CsvHeader::iterator::operator-=(difference_type n) {
  iter_ -= n;
  return *this;
}

inline CsvHeader::iterator CsvHeader::iterator::operator-(
    difference_type n) const {
  return iterator(*this) -= n;
}

inline typename CsvHeader::iterator::reference CsvHeader::iterator::operator[](
    difference_type n) const {
  return *(*this + n);
}

template <
    typename Names,
    std::enable_if_t<std::conjunction_v<NotSameRef<CsvHeader, Names>,
                                        IsIterableOf<Names, absl::string_view>>,
                     int>>
CsvHeader::CsvHeader(Names&& names) {
  const absl::Status status =
      TryResetInternal(nullptr, std::forward<Names>(names));
  RIEGELI_CHECK_OK(status) << "Failed precondition of CsvHeader::CsvHeader()";
}

template <typename Names,
          std::enable_if_t<IsIterableOf<Names, absl::string_view>::value, int>>
CsvHeader::CsvHeader(std::function<std::string(absl::string_view)> normalizer,
                     Names&& names) {
  const absl::Status status =
      TryResetInternal(std::move(normalizer), std::forward<Names>(names));
  RIEGELI_CHECK_OK(status) << "Failed precondition of CsvHeader::CsvHeader()";
}

template <
    typename Names,
    std::enable_if_t<std::conjunction_v<NotSameRef<CsvHeader, Names>,
                                        IsIterableOf<Names, absl::string_view>>,
                     int>>
void CsvHeader::Reset(Names&& names) {
  const absl::Status status =
      TryResetInternal(nullptr, std::forward<Names>(names));
  RIEGELI_CHECK_OK(status) << "Failed precondition of CsvHeader::Reset()";
}

template <typename Names,
          std::enable_if_t<IsIterableOf<Names, absl::string_view>::value, int>>
void CsvHeader::Reset(std::function<std::string(absl::string_view)> normalizer,
                      Names&& names) {
  const absl::Status status =
      TryResetInternal(std::move(normalizer), std::forward<Names>(names));
  RIEGELI_CHECK_OK(status) << "Failed precondition of CsvHeader::Reset()";
}

template <
    typename Names,
    std::enable_if_t<std::conjunction_v<NotSameRef<CsvHeader, Names>,
                                        IsIterableOf<Names, absl::string_view>>,
                     int>>
absl::Status CsvHeader::TryReset(Names&& names) {
  return TryResetInternal(nullptr, std::forward<Names>(names));
}

template <typename Names,
          std::enable_if_t<IsIterableOf<Names, absl::string_view>::value, int>>
absl::Status CsvHeader::TryReset(
    std::function<std::string(absl::string_view)> normalizer, Names&& names) {
  return TryResetInternal(std::move(normalizer), std::forward<Names>(names));
}

template <typename Names>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline absl::Status CsvHeader::TryResetInternal(
    std::function<std::string(absl::string_view)>&& normalizer, Names&& names) {
  if constexpr (IsRandomAccessIterable<Names>::value) {
    // Iterable supports random access, which allows `std::equal()` in
    // `MaybeResetToCachedPayload()` to compare the size first, which makes it
    // efficient to call `MaybeResetToCachedPayload()` before converting `names`
    // to `std::vector<std::string>`.
    if (normalizer == nullptr && MaybeResetToCachedPayload(names)) {
      return absl::OkStatus();
    }
    return TryResetUncached(
        std::move(normalizer),
        csv_internal::ToStringVector(std::forward<Names>(names)));
  } else {
    return TryResetInternal(
        std::move(normalizer),
        csv_internal::ToStringVector(std::forward<Names>(names)));
  }
}

ABSL_ATTRIBUTE_ALWAYS_INLINE
inline absl::Status CsvHeader::TryResetInternal(
    std::function<std::string(absl::string_view)>&& normalizer,
    std::vector<std::string>&& names) {
  if (normalizer == nullptr && MaybeResetToCachedPayload(names)) {
    return absl::OkStatus();
  }
  return TryResetUncached(std::move(normalizer), std::move(names));
}

template <typename Names>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool CsvHeader::MaybeResetToCachedPayload(
    Names&& names) {
  using std::begin;
  auto names_iter = begin(names);
  using std::end;
  auto names_end_iter = end(names);
  if (names_iter == names_end_iter) {
    payload_.Reset();
    return true;
  }
  SharedPtr<Payload> payload;
  {
    absl::MutexLock lock(payload_cache_mutex_);
    payload = payload_cache_;
  }
  if (payload != nullptr &&
      std::equal(payload->index_to_name.begin(), payload->index_to_name.end(),
                 names_iter, names_end_iter)) {
    payload_ = std::move(payload);
    return true;
  }
  return false;
}

template <typename... Names, std::enable_if_t<(sizeof...(Names) > 0), int>>
inline void CsvHeader::Add(StringInitializer name, Names&&... names) {
  Add(std::move(name));
  Add(std::forward<Names>(names)...);
}

template <typename... Names, std::enable_if_t<(sizeof...(Names) > 0), int>>
inline absl::Status CsvHeader::TryAdd(StringInitializer name,
                                      Names&&... names) {
  if (absl::Status status = TryAdd(std::move(name)); !status.ok()) {
    return status;
  }
  return TryAdd(std::forward<Names>(names)...);
}

inline absl::Span<const std::string> CsvHeader::names() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  if (ABSL_PREDICT_FALSE(payload_ == nullptr)) return {};
  return payload_->index_to_name;
}

inline const std::function<std::string(absl::string_view)>&
CsvHeader::normalizer() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
  if (ABSL_PREDICT_FALSE(payload_ == nullptr)) {
    return Global<std::function<std::string(absl::string_view)>>();
  }
  return payload_->normalizer;
}

inline CsvHeader::iterator CsvHeader::begin() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  if (ABSL_PREDICT_FALSE(payload_ == nullptr)) return iterator();
  return iterator(payload_->index_to_name.cbegin());
}

inline CsvHeader::iterator CsvHeader::end() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  if (ABSL_PREDICT_FALSE(payload_ == nullptr)) return iterator();
  return iterator(payload_->index_to_name.cend());
}

inline bool CsvHeader::empty() const {
  return payload_ == nullptr || payload_->index_to_name.empty();
}

inline size_t CsvHeader::size() const {
  if (ABSL_PREDICT_FALSE(payload_ == nullptr)) return 0;
  return payload_->index_to_name.size();
}

template <size_t num_fields>
inline const CsvHeader* CsvHeaderConstant<num_fields>::get() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  absl::call_once(once_,
                  [&] { new (header_) CsvHeader(normalizer_, fields_); });
  return std::launder(reinterpret_cast<const CsvHeader*>(header_));
}

template <typename FieldIterator>
inline CsvRecord::IteratorImpl<FieldIterator>::IteratorImpl(
    CsvHeader::iterator name_iter, FieldIterator field_iter)
    : name_iter_(name_iter), field_iter_(field_iter) {}

template <typename FieldIterator>
template <typename ThatFieldIterator,
          std::enable_if_t<
              std::conjunction_v<
                  std::negation<std::is_same<ThatFieldIterator, FieldIterator>>,
                  std::is_convertible<ThatFieldIterator, FieldIterator>>,
              int>>
inline CsvRecord::IteratorImpl<FieldIterator>::IteratorImpl(
    IteratorImpl<ThatFieldIterator> that) noexcept
    : name_iter_(that.name_iter_), field_iter_(that.field_iter_) {}

template <typename FieldIterator>
inline typename CsvRecord::IteratorImpl<FieldIterator>::reference
CsvRecord::IteratorImpl<FieldIterator>::operator*() const {
  return reference(*name_iter_, *field_iter_);
}

template <typename FieldIterator>
inline typename CsvRecord::IteratorImpl<FieldIterator>::pointer
CsvRecord::IteratorImpl<FieldIterator>::operator->() const {
  return pointer(**this);
}

template <typename FieldIterator>
inline CsvRecord::IteratorImpl<FieldIterator>&
CsvRecord::IteratorImpl<FieldIterator>::operator++() {
  ++name_iter_;
  ++field_iter_;
  return *this;
}

template <typename FieldIterator>
inline CsvRecord::IteratorImpl<FieldIterator>
CsvRecord::IteratorImpl<FieldIterator>::operator++(int) {
  const IteratorImpl<FieldIterator> tmp = *this;
  ++*this;
  return tmp;
}

template <typename FieldIterator>
inline CsvRecord::IteratorImpl<FieldIterator>&
CsvRecord::IteratorImpl<FieldIterator>::operator--() {
  --name_iter_;
  --field_iter_;
  return *this;
}

template <typename FieldIterator>
inline CsvRecord::IteratorImpl<FieldIterator>
CsvRecord::IteratorImpl<FieldIterator>::operator--(int) {
  const IteratorImpl<FieldIterator> tmp = *this;
  --*this;
  return tmp;
}

template <typename FieldIterator>
inline CsvRecord::IteratorImpl<FieldIterator>&
CsvRecord::IteratorImpl<FieldIterator>::operator+=(difference_type n) {
  name_iter_ += n;
  field_iter_ += n;
  return *this;
}

template <typename FieldIterator>
inline CsvRecord::IteratorImpl<FieldIterator>
CsvRecord::IteratorImpl<FieldIterator>::operator+(difference_type n) const {
  return IteratorImpl<FieldIterator>(*this) += n;
}

template <typename FieldIterator>
inline CsvRecord::IteratorImpl<FieldIterator>&
CsvRecord::IteratorImpl<FieldIterator>::operator-=(difference_type n) {
  name_iter_ -= n;
  field_iter_ -= n;
  return *this;
}

template <typename FieldIterator>
inline CsvRecord::IteratorImpl<FieldIterator>
CsvRecord::IteratorImpl<FieldIterator>::operator-(difference_type n) const {
  return IteratorImpl<FieldIterator>(*this) -= n;
}

template <typename FieldIterator>
inline typename CsvRecord::IteratorImpl<FieldIterator>::reference
CsvRecord::IteratorImpl<FieldIterator>::operator[](difference_type n) const {
  return *(*this + n);
}

inline CsvRecord::CsvRecord(CsvHeader header)
    : header_(std::move(header)), fields_(header_.size()) {}

template <typename Fields,
          std::enable_if_t<IsIterableOf<Fields, absl::string_view>::value, int>>
CsvRecord::CsvRecord(CsvHeader header, Fields&& fields) {
  const absl::Status status =
      TryResetInternal(std::move(header), std::forward<Fields>(fields));
  RIEGELI_CHECK_OK(status) << "Failed precondition of CsvRecord::CsvRecord()";
}

template <typename Fields,
          std::enable_if_t<IsIterableOf<Fields, absl::string_view>::value, int>>
void CsvRecord::Reset(CsvHeader header, Fields&& fields) {
  const absl::Status status =
      TryResetInternal(std::move(header), std::forward<Fields>(fields));
  RIEGELI_CHECK_OK(status) << "Failed precondition of CsvRecord::Reset()";
}

template <typename Fields,
          std::enable_if_t<IsIterableOf<Fields, absl::string_view>::value, int>>
absl::Status CsvRecord::TryReset(CsvHeader header, Fields&& fields) {
  return TryResetInternal(std::move(header), std::forward<Fields>(fields));
}

template <typename Fields>
inline absl::Status CsvRecord::TryResetInternal(CsvHeader&& header,
                                                Fields&& fields) {
  return TryResetInternal(std::move(header), csv_internal::ToStringVector(
                                                 std::forward<Fields>(fields)));
}

inline CsvRecord::iterator CsvRecord::begin() ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return iterator(header_.begin(), fields_.begin());
}

inline CsvRecord::const_iterator CsvRecord::begin() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return const_iterator(header_.begin(), fields_.begin());
}

inline CsvRecord::iterator CsvRecord::end() ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return iterator(header_.end(), fields_.end());
}

inline CsvRecord::const_iterator CsvRecord::end() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  return const_iterator(header_.end(), fields_.end());
}

template <
    typename Src,
    std::enable_if_t<IsIterableOf<Src, std::pair<absl::string_view,
                                                 absl::string_view>>::value,
                     int>>
void CsvRecord::Merge(Src&& src) {
  const absl::Status status = TryMerge(std::forward<Src>(src));
  RIEGELI_CHECK_OK(status) << "Failed precondition of CsvHeader::Merge()";
}

template <
    typename Src,
    std::enable_if_t<IsIterableOf<Src, std::pair<absl::string_view,
                                                 absl::string_view>>::value,
                     int>>
absl::Status CsvRecord::TryMerge(Src&& src) {
  using std::begin;
  auto src_iter = begin(src);
  using std::end;
  auto src_end_iter = end(src);
  iterator this_iter = this->begin();
  // If fields of `src` match a prefix of fields of `*this` (like when extending
  // a `CsvRecord` with fields added at the end), avoid string lookups and just
  // verify the assumption.
  for (;;) {
    if (src_iter == src_end_iter) return absl::OkStatus();
    if (this_iter == this->end() || this_iter->first != src_iter->first) break;
    riegeli::Reset(
        this_iter->second,
        StringInitializer((*MaybeMakeMoveIterator<Src>(src_iter)).second));
    ++this_iter;
    ++src_iter;
  }
  RIEGELI_ASSERT(src_iter != src_end_iter)
      << "The code below assumes that the code above "
         "did not leave the source iterator at the end";
  // The assumption about matching fields no longer holds. Switch to string
  // lookups for the remaining fields.
  std::vector<std::string> missing_names;
  do {
    this_iter = find(src_iter->first);
    if (ABSL_PREDICT_FALSE(this_iter == this->end())) {
      missing_names.emplace_back(src_iter->first);
    } else {
      riegeli::Reset(
          this_iter->second,
          StringInitializer((*MaybeMakeMoveIterator<Src>(src_iter)).second));
    }
    ++src_iter;
  } while (src_iter != src_end_iter);
  if (ABSL_PREDICT_FALSE(!missing_names.empty())) {
    return FailMissingNames(missing_names);
  }
  return absl::OkStatus();
}

template <typename Dest,
          std::enable_if_t<IsIterableOfPairsWithAssignableValues<
                               Dest, absl::string_view, std::string>::value,
                           int>>
void CsvRecord::Split(Dest& dest) const {
  const absl::Status status = TrySplit(dest);
  RIEGELI_CHECK_OK(status) << "Failed precondition of CsvHeader::Split()";
}

template <typename Dest,
          std::enable_if_t<IsIterableOfPairsWithAssignableValues<
                               Dest, absl::string_view, std::string>::value,
                           int>>
absl::Status CsvRecord::TrySplit(Dest& dest) const {
  using std::begin;
  auto dest_iter = begin(dest);
  using std::end;
  auto dest_end_iter = end(dest);
  const_iterator this_iter = this->begin();
  // If fields of `dest` match a prefix of fields of `*this` (like when
  // projecting a `CsvRecord` to its prefix), avoid string lookups and just
  // verify the assumption.
  for (;;) {
    if (dest_iter == dest_end_iter) return absl::OkStatus();
    if (this_iter == this->end() || this_iter->first != dest_iter->first) break;
    dest_iter->second = this_iter->second;
    ++this_iter;
    ++dest_iter;
  }
  RIEGELI_ASSERT(dest_iter != dest_end_iter)
      << "The code below assumes that the code above "
         "did not leave the destination iterator at the end";
  // The assumption about matching fields no longer holds. Switch to string
  // lookups for the remaining fields.
  std::vector<std::string> missing_names;
  do {
    this_iter = find(dest_iter->first);
    if (ABSL_PREDICT_FALSE(this_iter == this->end())) {
      missing_names.emplace_back(dest_iter->first);
    } else {
      dest_iter->second = this_iter->second;
    }
    ++dest_iter;
  } while (dest_iter != dest_end_iter);
  if (ABSL_PREDICT_FALSE(!missing_names.empty())) {
    return FailMissingNames(missing_names);
  }
  return absl::OkStatus();
}

}  // namespace riegeli

#endif  // RIEGELI_CSV_CSV_RECORD_H_


================================================
FILE: riegeli/csv/csv_writer.cc
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/csv/csv_writer.h"

#include <stddef.h>

#include <array>
#include <cstring>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/debug.h"
#include "riegeli/base/status.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/csv/csv_record.h"
#include "riegeli/lines/line_writing.h"
#include "riegeli/lines/newline.h"

namespace riegeli {

void CsvWriterBase::Initialize(Writer* dest, Options&& options) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of CsvWriter: null Writer pointer";
  // Set `header_` early, so that `header()` is valid even in case of a failure,
  // and because `WriteRecord(const CsvRecord&)` uses this as a precondition.
  bool write_header = false;
  if (options.header() != std::nullopt) {
    RIEGELI_ASSERT(options.assumed_header() == std::nullopt)
        << "Failed precondition of CsvWriter: "
           "header() and assumed_header() both set";
    has_header_ = true;
    write_header = true;
    header_ = *std::move(options.header());
  } else if (options.assumed_header() != std::nullopt) {
    has_header_ = true;
    header_ = *std::move(options.assumed_header());
  }

  if (options.comment() != std::nullopt &&
      ABSL_PREDICT_FALSE(*options.comment() == '\n' ||
                         *options.comment() == '\r')) {
    Fail(absl::InvalidArgumentError(
        absl::StrCat("Comment character conflicts with record separator: ",
                     riegeli::Debug(*options.comment()))));
    return;
  }
  if (ABSL_PREDICT_FALSE(options.field_separator() == '\n' ||
                         options.field_separator() == '\r')) {
    Fail(absl::InvalidArgumentError(
        absl::StrCat("Field separator conflicts with record separator: ",
                     riegeli::Debug(options.field_separator()))));
    return;
  }
  if (ABSL_PREDICT_FALSE(options.field_separator() == options.comment())) {
    Fail(absl::InvalidArgumentError(
        absl::StrCat("Field separator conflicts with comment character: ",
                     riegeli::Debug(options.field_separator()))));
    return;
  }
  if (options.quote() != std::nullopt) {
    if (ABSL_PREDICT_FALSE(*options.quote() == '\n' ||
                           *options.quote() == '\r')) {
      Fail(absl::InvalidArgumentError(
          absl::StrCat("Quote character conflicts with record separator: ",
                       riegeli::Debug(*options.quote()))));
      return;
    }
    if (ABSL_PREDICT_FALSE(*options.quote() == options.comment())) {
      Fail(absl::InvalidArgumentError(
          absl::StrCat("Quote character conflicts with comment character: ",
                       riegeli::Debug(*options.quote()))));
      return;
    }
    if (ABSL_PREDICT_FALSE(*options.quote() == options.field_separator())) {
      Fail(absl::InvalidArgumentError(
          absl::StrCat("Quote character conflicts with field separator: ",
                       riegeli::Debug(*options.quote()))));
      return;
    }
  }

  quotes_needed_['\n'] = true;
  quotes_needed_['\r'] = true;
  if (options.comment() != std::nullopt) {
    quotes_needed_[static_cast<unsigned char>(*options.comment())] = true;
  }
  quotes_needed_[static_cast<unsigned char>(options.field_separator())] = true;
  if (options.quote() != std::nullopt) {
    quotes_needed_[static_cast<unsigned char>(*options.quote())] = true;
  }
  newline_ = options.newline();
  field_separator_ = options.field_separator();
  quote_ = options.quote();

  if (ABSL_PREDICT_FALSE(!dest->ok())) {
    FailWithoutAnnotation(AnnotateOverDest(dest->status()));
    return;
  }
  if (options.write_utf8_bom()) WriteUtf8Bom(*dest);

  if (write_header) {
    if (ABSL_PREDICT_TRUE(WriteRecord(header_.names()))) --record_index_;
  }
}

absl::Status CsvWriterBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    Writer& dest = *DestWriter();
    status = dest.AnnotateStatus(std::move(status));
  }
  return AnnotateOverDest(std::move(status));
}

absl::Status CsvWriterBase::AnnotateOverDest(absl::Status status) {
  if (!standalone_record_) {
    return Annotate(status, absl::StrCat("at record ", record_index()));
  }
  return status;
}

inline bool CsvWriterBase::WriteQuoted(Writer& dest, absl::string_view field,
                                       size_t already_scanned) {
  RIEGELI_ASSERT_NE(quote_, std::nullopt)
      << "Failed precondition of CsvWriterBase::WriteQuoted(): "
         "quote character not available";
  if (ABSL_PREDICT_FALSE(!dest.Write(*quote_))) {
    return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
  }
  const char* start = field.data();
  const char* next_to_check = field.data() + already_scanned;
  const char* const limit = field.data() + field.size();
  // Write characters in the range [`start`..`limit`), except that if quotes are
  // found in the range [`next_to_check`..`limit`), write them twice.
  while (const char* const next_quote = static_cast<const char*>(std::memchr(
             next_to_check, *quote_, PtrDistance(next_to_check, limit)))) {
    if (ABSL_PREDICT_FALSE(!dest.Write(
            absl::string_view(start, PtrDistance(start, next_quote + 1))))) {
      return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
    }
    start = next_quote;
    next_to_check = next_quote + 1;
  }
  if (ABSL_PREDICT_FALSE(
          !dest.Write(absl::string_view(start, PtrDistance(start, limit))))) {
    return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
  }
  if (ABSL_PREDICT_FALSE(!dest.Write(*quote_))) {
    return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
  }
  return true;
}

bool CsvWriterBase::WriteQuotes(Writer& dest) {
  if (quote_ == std::nullopt) return true;
  if (ABSL_PREDICT_FALSE(!dest.Write(*quote_) || !dest.Write(*quote_))) {
    return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
  }
  return true;
}

bool CsvWriterBase::WriteFirstField(Writer& dest, absl::string_view field) {
  // Quote the first field if the field together with the field separator could
  // make the line starting with UTF-8 BOM.
  //
  // For simplicity other fields are not considered, at the cost of unnecessary
  // quoting in corner cases.
  if (ABSL_PREDICT_FALSE(
          field.empty() ? field_separator_ == kUtf8Bom[0]
                        : field[0] == kUtf8Bom[0] &&
                              (field.size() == 1
                                   ? field_separator_ == kUtf8Bom[1]
                                   : field[1] == kUtf8Bom[1] &&
                                         (field.size() == 2
                                              ? field_separator_ == kUtf8Bom[2]
                                              : field[2] == kUtf8Bom[2]))) &&
      quote_ != std::nullopt) {
    return WriteQuoted(dest, field, 0);
  }
  return WriteField(dest, field);
}

bool CsvWriterBase::WriteField(Writer& dest, absl::string_view field) {
  for (size_t i = 0; i < field.size(); ++i) {
    if (quotes_needed_[static_cast<unsigned char>(field[i])]) {
      if (ABSL_PREDICT_FALSE(quote_ == std::nullopt)) {
        return Fail(absl::InvalidArgumentError(
            absl::StrCat("If quoting is turned off, special characters inside "
                         "fields are not "
                         "expressible: ",
                         riegeli::Debug(field[i]))));
      }
      return WriteQuoted(dest, field, i);
    }
  }
  if (ABSL_PREDICT_FALSE(!dest.Write(field))) {
    return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
  }
  return true;
}

bool CsvWriterBase::WriteRecord(const CsvRecord& record) {
  RIEGELI_CHECK(has_header_)
      << "Failed precondition of CsvWriterBase::WriteRecord(CsvRecord): "
         "CsvWriterBase::Options::header() is required";
  if (ok()) {
    RIEGELI_CHECK_EQ(record.header(), header_)
        << "Failed precondition of CsvWriterBase::WriteRecord(CsvRecord): "
        << "mismatched CSV header and record";
  }
  return WriteRecord(record.fields());
}

}  // namespace riegeli


================================================
FILE: riegeli/csv/csv_writer.h
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_CSV_CSV_WRITER_H_
#define RIEGELI_CSV_CSV_WRITER_H_

#include <stddef.h>
#include <stdint.h>

#include <array>
#include <initializer_list>
#include <limits>
#include <optional>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/iterable.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/base/reset.h"
#include "riegeli/bytes/string_writer.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/csv/csv_record.h"
#include "riegeli/lines/line_writing.h"
#include "riegeli/lines/newline.h"

namespace riegeli {

class CsvWriterBase;

namespace csv_internal {

template <typename Fields>
bool WriteStandaloneRecord(const Fields& record, CsvWriterBase& csv_writer);

}  // namespace csv_internal

// Template parameter independent part of `CsvWriter`.
class CsvWriterBase : public Object {
 public:
  class Options {
   public:
    Options() noexcept {}

    // If not `std::nullopt`, sets field names, and automatically writes them
    // as the first record.
    //
    // In this case `WriteRecord(CsvRecord)` is supported. Otherwise no
    // particular header is assumed, and only `WriteRecord()` from a sequence of
    // fields is supported.
    //
    // The CSV format does not support empty records. A header with no fields
    // will be written as an empty line, which will be read as a header
    // consisting of one empty field, or will be skipped if
    // `CsvReaderBase::Options::skip_empty_lines()`.
    //
    // Default: `std::nullopt`.
    Options& set_header(Initializer<std::optional<CsvHeader>> header) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      riegeli::Reset(header_, std::move(header));
      return *this;
    }
    Options&& set_header(Initializer<std::optional<CsvHeader>> header) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_header(std::move(header)));
    }
    Options& set_header(std::initializer_list<absl::string_view> names) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return set_header(Initializer<std::optional<CsvHeader>>(names));
    }
    Options&& set_header(std::initializer_list<absl::string_view> names) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_header(names));
    }
    std::optional<CsvHeader>& header() ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return header_;
    }
    const std::optional<CsvHeader>& header() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return header_;
    }

    // If not `std::nullopt`, a header is not written to the file, but
    // `WriteRecord(CsvRecord&)` is supported as if this header was written as
    // the first record.
    //
    // `header()` and `assumed_header()` must not be both set.
    //
    // Default: `std::nullopt`.
    Options& set_assumed_header(Initializer<std::optional<CsvHeader>> header) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      riegeli::Reset(assumed_header_, std::move(header));
      return *this;
    }
    Options&& set_assumed_header(
        Initializer<std::optional<CsvHeader>> header) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_assumed_header(std::move(header)));
    }
    Options& set_assumed_header(
        std::initializer_list<absl::string_view> names) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return set_assumed_header(Initializer<std::optional<CsvHeader>>(names));
    }
    Options&& set_assumed_header(
        std::initializer_list<absl::string_view> names) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_assumed_header(names));
    }
    std::optional<CsvHeader>& assumed_header() ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return assumed_header_;
    }
    const std::optional<CsvHeader>& assumed_header() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return assumed_header_;
    }

    // If `false`, does not write initial UTF-8 BOM. This conforms to RFC4180
    // and UTF-8 BOM is normally not used on Unix.
    //
    // If `true`, writes initial UTF-8 BOM. Microsoft Excel by default expects
    // UTF-8 BOM in order to recognize UTF-8 contents without prompting for the
    // encoding.
    //
    // By default `CsvReader` will understand files written with any option.
    //
    // Default: `false`.
    Options& set_write_utf8_bom(bool write_utf8_bom) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      write_utf8_bom_ = write_utf8_bom;
      return *this;
    }
    Options&& set_write_utf8_bom(bool write_utf8_bom) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_write_utf8_bom(write_utf8_bom));
    }
    bool write_utf8_bom() const { return write_utf8_bom_; }

    // Record terminator.
    //
    // RFC4180 requires `WriteNewline::kCrLf` but Unix normally uses `kLf`.
    // `CsvReader` will understand files written with any option.
    //
    // Default: `WriteNewline::kNative`.
    Options& set_newline(WriteNewline newline) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      newline_ = newline;
      return *this;
    }
    Options&& set_newline(WriteNewline newline) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_newline(newline));
    }
    WriteNewline newline() const { return newline_; }

    // Comment character.
    //
    // If not `std::nullopt`, fields containing this character will be quoted.
    // This is not covered by RFC4180.
    //
    // Often used: '#'.
    //
    // Default: `std::nullopt`.
    Options& set_comment(std::optional<char> comment) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      comment_ = comment;
      return *this;
    }
    Options&& set_comment(std::optional<char> comment) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_comment(comment));
    }
    std::optional<char> comment() const { return comment_; }

    // Field separator.
    //
    // Default: ','.
    Options& set_field_separator(char field_separator) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      field_separator_ = field_separator;
      return *this;
    }
    Options&& set_field_separator(char field_separator) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_field_separator(field_separator));
    }
    char field_separator() const { return field_separator_; }

    // Quote character.
    //
    // Quotes around a field allow expressing special characters inside the
    // field: LF, CR, comment character, field separator, or quote character
    // itself.
    //
    // To express a quote inside a quoted field, it must be written twice.
    //
    // Quotes are also used for unambiguous interpretation of a record
    // consisting of a single empty field or beginning with UTF-8 BOM.
    //
    // If `std::nullopt`, special characters inside fields are not expressible,
    // and `CsvWriter` fails if they are encountered, except that potential
    // ambiguities above skip quoting instead. In this case, reading a record
    // consisting of a single empty field is incompatible with
    // `CsvReaderBase::Options::skip_empty_lines()`, and reading the first
    // record beginning with UTF-8 BOM requires
    // `CsvReaderBase::Options::set_preserve_utf8_bom()`.
    //
    // Default: '"'.
    Options& set_quote(std::optional<char> quote) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      quote_ = quote;
      return *this;
    }
    Options&& set_quote(std::optional<char> quote) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_quote(quote));
    }
    std::optional<char> quote() const { return quote_; }

   private:
    std::optional<CsvHeader> header_;
    std::optional<CsvHeader> assumed_header_;
    bool write_utf8_bom_ = false;
    WriteNewline newline_ = WriteNewline::kNative;
    std::optional<char> comment_;
    char field_separator_ = ',';
    std::optional<char> quote_ = '"';
  };

  // Returns the byte `Writer` being written to. Unchanged by `Close()`.
  virtual Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns `true` if writing the header was requested, i.e. if
  // `Options::header() != std::nullopt`.
  //
  // In this case `WriteRecord(CsvRecord)` is supported. Otherwise no particular
  // header is assumed, and only `WriteRecord()` from a sequence of fields is
  // supported.
  bool has_header() const { return has_header_; }

  // If `has_header()`, returns field names set by `Options::header()` and
  // written to the first record.
  //
  // If `!has_header()`, returns an empty header.
  const CsvHeader& header() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return header_;
  }

  // Writes the next record expressed as `CsvRecord`, with named fields.
  //
  // The CSV format does not support empty records. A record with no fields will
  // be written as an empty line, which will be read as a record consisting of
  // one empty field, or will be skipped if
  // `CsvReaderBase::Options::skip_empty_lines()`.
  //
  // Preconditions:
  //  * `has_header()`, i.e. `Options::header() != std::nullopt`
  //  * `record.header() == header()`
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  bool WriteRecord(const CsvRecord& record);

  // Writes the next record expressed as a sequence of fields.
  //
  // The type of `record` must support iteration yielding `absl::string_view`:
  // `for (const absl::string_view field : record)`,
  // e.g. `std::vector<std::string>`.
  //
  // By a common convention each record should consist of the same number of
  // fields, but this is not enforced.
  //
  // The CSV format does not support empty records. A record with no fields will
  // be written as an empty line, which will be read as a record consisting of
  // one empty field, or will be skipped if
  // `CsvReaderBase::Options::skip_empty_lines()`.
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  template <
      typename Record,
      std::enable_if_t<IsIterableOf<Record, absl::string_view>::value, int> = 0>
  bool WriteRecord(const Record& record);
  bool WriteRecord(std::initializer_list<absl::string_view> record);

  // The index of the most recently written record, starting from 0.
  //
  // The record count does not include any header written with
  // `Options::header()`.
  //
  // `last_record_index()` is unchanged by `Close()`.
  //
  // Precondition: some record was successfully written (`record_index() > 0`).
  uint64_t last_record_index() const;

  // The index of the next record, starting from 0.
  //
  // The record count does not include any header written with
  // `Options::header()`.
  //
  // `record_index()` is unchanged by `Close()`.
  uint64_t record_index() const { return record_index_; }

 protected:
  using Object::Object;

  CsvWriterBase(CsvWriterBase&& that) noexcept;
  CsvWriterBase& operator=(CsvWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(Writer* dest, Options&& options);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverDest(absl::Status status);

  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;

 private:
  template <typename Record>
  friend bool csv_internal::WriteStandaloneRecord(const Record& record,
                                                  CsvWriterBase& csv_writer);

  bool WriteQuoted(Writer& dest, absl::string_view field,
                   size_t already_scanned);
  bool WriteQuotes(Writer& dest);
  bool WriteFirstField(Writer& dest, absl::string_view field);
  bool WriteField(Writer& dest, absl::string_view field);
  template <typename Record>
  bool WriteRecordInternal(const Record& record);

  bool standalone_record_ = false;
  bool has_header_ = false;
  CsvHeader header_;
  // Lookup table for checking whether quotes are needed if the given character
  // is present in a field.
  //
  // Using `std::bitset` instead would make `CsvWriter` about 20% slower because
  // of a more complicated lookup code.
  std::array<bool, std::numeric_limits<unsigned char>::max() + 1>
      quotes_needed_{};
  WriteNewline newline_ = WriteNewline::kNative;
  char field_separator_ = '\0';
  std::optional<char> quote_;
  uint64_t record_index_ = 0;
};

// `CsvWriter` writes records to a CSV (comma-separated values) file.
//
// A basic variant of CSV is specified in https://tools.ietf.org/html/rfc4180,
// and some common extensions are described in
// https://specs.frictionlessdata.io/csv-dialect/.
//
// `CsvWriter` writes RFC4180-compliant CSV files if
// `Options::newline() == WriteNewline::kCrLf`, and also supports some
// extensions.
//
// By a common convention the first record consists of field names. This is
// supported by `Options::header()` and `WriteRecord(CsvRecord)`.
//
// A record is terminated by a newline: LF or CR-LF ("\n" or "\r\n").
//
// A record consists of a sequence of fields separated by a field separator
// (usually ',' or '\t'). Each record contains at least one field.
//
// Quotes (usually '"') around a field allow expressing special characters
// inside the field: LF, CR, comment character, field separator, or quote
// character itself.
//
// To express a quote inside a quoted field, it must be written twice.
//
// Quotes are also used for unambiguous interpretation of a record consisting of
// a single empty field or beginning with UTF-8 BOM.
//
// If quoting is turned off, special characters inside fields are not
// expressible, and `CsvWriter` fails if they are encountered, except that
// potential ambiguities above skip quoting instead. In this case, reading
// a record consisting of a single empty field is incompatible with
// `CsvReaderBase::Options::skip_empty_lines()`, and reading the first record
// beginning with UTF-8 BOM requires
// `CsvReaderBase::Options::set_preserve_utf8_bom()`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the byte `Writer`. `Dest` must support
// `Dependency<Writer*, Dest>`, e.g. `Writer*` (not owned, default),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The current position is synchronized with the byte `Writer` between records.
template <typename Dest = Writer*>
class CsvWriter : public CsvWriterBase {
 public:
  // Creates a closed `CsvWriter`.
  explicit CsvWriter(Closed) noexcept : CsvWriterBase(kClosed) {}

  // Will write to the byte `Writer` provided by `dest`.
  explicit CsvWriter(Initializer<Dest> dest, Options options = Options());

  CsvWriter(CsvWriter&& that) = default;
  CsvWriter& operator=(CsvWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `CsvWriter`. This avoids
  // constructing a temporary `CsvWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the byte `Writer`.
  // Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;

 private:
  // The object providing and possibly owning the byte `Writer`.
  Dependency<Writer*, Dest> dest_;
};

explicit CsvWriter(Closed) -> CsvWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit CsvWriter(Dest&& dest,
                   CsvWriterBase::Options options = CsvWriterBase::Options())
    -> CsvWriter<TargetT<Dest>>;

// Writes a single record to a CSV string.
//
// A record terminator will not be included.
//
// The type of `record` must support iteration yielding `absl::string_view`:
// `for (const absl::string_view field : record)`,
// e.g. `std::vector<std::string>`.
//
// Preconditions:
//  * `options.header() == std::nullopt`
//  * if `options.quote() == std::nullopt`, fields do not include inexpressible
//    characters: LF, CR, comment character, field separator.
template <
    typename Record,
    std::enable_if_t<IsIterableOf<Record, absl::string_view>::value, int> = 0>
std::string WriteCsvRecordToString(
    const Record& record,
    CsvWriterBase::Options options = CsvWriterBase::Options());
std::string WriteCsvRecordToString(
    std::initializer_list<absl::string_view> record,
    CsvWriterBase::Options options = CsvWriterBase::Options());

// Implementation details follow.

inline CsvWriterBase::CsvWriterBase(CsvWriterBase&& that) noexcept
    : Object(static_cast<Object&&>(that)),
      standalone_record_(that.standalone_record_),
      has_header_(that.has_header_),
      header_(std::move(that.header_)),
      quotes_needed_(that.quotes_needed_),
      newline_(that.newline_),
      field_separator_(that.field_separator_),
      quote_(that.quote_),
      record_index_(std::exchange(that.record_index_, 0)) {}

inline CsvWriterBase& CsvWriterBase::operator=(CsvWriterBase&& that) noexcept {
  Object::operator=(static_cast<Object&&>(that));
  standalone_record_ = that.standalone_record_;
  has_header_ = that.has_header_;
  header_ = std::move(that.header_);
  quotes_needed_ = that.quotes_needed_;
  newline_ = that.newline_;
  field_separator_ = that.field_separator_;
  quote_ = that.quote_;
  record_index_ = std::exchange(that.record_index_, 0);
  return *this;
}

inline void CsvWriterBase::Reset(Closed) {
  Object::Reset(kClosed);
  standalone_record_ = false;
  has_header_ = false;
  header_.Reset();
  record_index_ = 0;
}

inline void CsvWriterBase::Reset() {
  Object::Reset();
  standalone_record_ = false;
  has_header_ = false;
  header_.Reset();
  quotes_needed_ = {};
  record_index_ = 0;
}

namespace csv_internal {

template <typename Record>
inline bool WriteStandaloneRecord(const Record& record,
                                  CsvWriterBase& csv_writer) {
  csv_writer.standalone_record_ = true;
  return csv_writer.WriteRecordInternal(record);
}

}  // namespace csv_internal

template <typename Record,
          std::enable_if_t<IsIterableOf<Record, absl::string_view>::value, int>>
inline bool CsvWriterBase::WriteRecord(const Record& record) {
  return WriteRecordInternal(record);
}

inline bool CsvWriterBase::WriteRecord(
    std::initializer_list<absl::string_view> record) {
  return WriteRecord<>(record);
}

template <typename Record>
inline bool CsvWriterBase::WriteRecordInternal(const Record& record) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (standalone_record_) {
    RIEGELI_ASSERT_EQ(record_index_, 0u)
        << "Failed precondition of CsvWriterBase::WriteRecordInternal(): "
           "called more than once by WriteCsvRecordToString()";
  }
  Writer& dest = *DestWriter();
  using std::begin;
  auto iter = begin(record);
  using std::end;
  auto end_iter = end(record);
  if (iter != end_iter) {
    const absl::string_view field = *iter;
    if (ABSL_PREDICT_FALSE(!WriteFirstField(dest, field))) return false;
    if (++iter != end_iter) {
      do {
        if (ABSL_PREDICT_FALSE(!dest.Write(field_separator_))) {
          return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
        }
        const absl::string_view field = *iter;
        if (ABSL_PREDICT_FALSE(!WriteField(dest, field))) return false;
      } while (++iter != end_iter);
    } else if (field.empty() &&
               ABSL_PREDICT_TRUE(field_separator_ != kUtf8Bom[0])) {
      // Quote a single empty field if not already quoted, to avoid writing an
      // empty line which might be skipped by some readers.
      if (ABSL_PREDICT_FALSE(!WriteQuotes(dest))) return false;
    }
  }
  if (!standalone_record_) {
    if (ABSL_PREDICT_FALSE(!WriteLine(dest, newline_))) {
      return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
    }
  }
  ++record_index_;
  return true;
}

inline uint64_t CsvWriterBase::last_record_index() const {
  RIEGELI_ASSERT_GT(record_index_, 0u)
      << "Failed precondition of CsvWriterBase::last_record_index(): "
         "no record was written";
  return record_index_ - 1;
}

template <typename Dest>
inline CsvWriter<Dest>::CsvWriter(Initializer<Dest> dest, Options options)
    : dest_(std::move(dest)) {
  Initialize(dest_.get(), std::move(options));
}

template <typename Dest>
inline void CsvWriter<Dest>::Reset(Closed) {
  CsvWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void CsvWriter<Dest>::Reset(Initializer<Dest> dest, Options options) {
  CsvWriterBase::Reset();
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), std::move(options));
}

template <typename Dest>
void CsvWriter<Dest>::Done() {
  CsvWriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
}

template <typename Record,
          std::enable_if_t<IsIterableOf<Record, absl::string_view>::value, int>>
std::string WriteCsvRecordToString(const Record& record,
                                   CsvWriterBase::Options options) {
  RIEGELI_ASSERT(options.header() == std::nullopt)
      << "Failed precondition of WriteCsvRecordToString(): "
         "options.header() != std::nullopt not applicable";
  std::string dest;
  CsvWriter csv_writer(riegeli::Maker<StringWriter>(&dest), std::move(options));
  csv_internal::WriteStandaloneRecord(record, csv_writer);
  // This can fail if `std::string` overflows, or if quoting is turned off and
  // fields include inexpressible characters.
  RIEGELI_CHECK(csv_writer.Close()) << csv_writer.status();
  return dest;
}

inline std::string WriteCsvRecordToString(
    std::initializer_list<absl::string_view> record,
    CsvWriterBase::Options options) {
  return WriteCsvRecordToString<>(record, std::move(options));
}

}  // namespace riegeli

#endif  // RIEGELI_CSV_CSV_WRITER_H_


================================================
FILE: riegeli/digests/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "digest_converter",
    hdrs = ["digest_converter.h"],
    deps = [
        "//riegeli/endian:endian_reading",
        "//riegeli/endian:endian_writing",
        "@com_google_absl//absl/meta:type_traits",
        "@com_google_absl//absl/numeric:int128",
    ],
)

cc_library(
    name = "digester_handle",
    srcs = ["digester_handle.cc"],
    hdrs = ["digester_handle.h"],
    deps = [
        ":digest_converter",
        "//riegeli/base:any",
        "//riegeli/base:assert",
        "//riegeli/base:byte_fill",
        "//riegeli/base:bytes_ref",
        "//riegeli/base:chain",
        "//riegeli/base:compare",
        "//riegeli/base:dependency",
        "//riegeli/base:type_erased_ref",
        "//riegeli/base:type_traits",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/numeric:int128",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "wrapping_digester",
    hdrs = ["wrapping_digester.h"],
    deps = [
        ":digest_converter",
        ":digester_handle",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:type_traits",
        "//riegeli/base:types",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "digesting_reader",
    srcs = ["digesting_reader.cc"],
    hdrs = ["digesting_reader.h"],
    deps = [
        ":digest_converter",
        ":digester_handle",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:reader",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "digesting_writer",
    srcs = ["digesting_writer.cc"],
    hdrs = ["digesting_writer.h"],
    deps = [
        ":digest_converter",
        ":digester_handle",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:type_traits",
        "//riegeli/base:types",
        "//riegeli/bytes:null_writer",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:stringify",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "crc32c_digester",
    hdrs = ["crc32c_digester.h"],
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/base:byte_fill",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/crc:crc32c",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "crc32_digester",
    srcs = ["crc32_digester.cc"],
    hdrs = ["crc32_digester.h"],
    deps = [
        "//riegeli/base:arithmetic",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
        "@zlib",
    ],
)

cc_library(
    name = "adler32_digester",
    srcs = ["adler32_digester.cc"],
    hdrs = ["adler32_digester.h"],
    deps = [
        "//riegeli/base:arithmetic",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
        "@zlib",
    ],
)

cc_library(
    name = "highwayhash_digester",
    srcs = ["highwayhash_digester.cc"],
    hdrs = ["highwayhash_digester.h"],
    deps = [
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/meta:type_traits",
        "@com_google_absl//absl/strings:string_view",
        "@highwayhash",
        "@highwayhash//:arch_specific",
        "@highwayhash//:hh_types",
    ],
)

cc_library(
    name = "openssl_digester",
    hdrs = ["openssl_digester.h"],
    deps = [
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
    ],
)

# Warning: MD5 as a cryptographic hash function is broken.
# Use this only if a preexisting format has already decided to use MD5.
cc_library(
    name = "md5_digester",
    hdrs = ["md5_digester.h"],
    deps = [
        ":openssl_digester",
        "@boringssl//:crypto",
    ],
)

# Warning: SHA-1 as a cryptographic hash function is broken.
# Use this only if a preexisting format has already decided to use SHA-1.
cc_library(
    name = "sha1_digester",
    hdrs = ["sha1_digester.h"],
    deps = [
        ":openssl_digester",
        "@boringssl//:crypto",
    ],
)

cc_library(
    name = "sha256_digester",
    hdrs = ["sha256_digester.h"],
    deps = [
        ":openssl_digester",
        "@boringssl//:crypto",
    ],
)

cc_library(
    name = "sha512_digester",
    hdrs = ["sha512_digester.h"],
    deps = [
        ":openssl_digester",
        "@boringssl//:crypto",
    ],
)

cc_library(
    name = "sha512_256_digester",
    hdrs = ["sha512_256_digester.h"],
    deps = [
        ":openssl_digester",
        "@boringssl//:crypto",
    ],
)


================================================
FILE: riegeli/digests/adler32_digester.cc
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/digests/adler32_digester.h"

#include <stdint.h>

#include "absl/base/optimization.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "zconf.h"
#include "zlib.h"

namespace riegeli {

Adler32Digester::Adler32Digester(uint32_t seed) : adler_(seed) {
  // This checks CPU features.
  adler32_z(0, nullptr, 0);
}

void Adler32Digester::Write(absl::string_view src) {
  if (ABSL_PREDICT_FALSE(src.empty())) {
    // `adler32_z(state, nullptr, 0)` exceptionally returns 1, not `state`.
    return;
  }
  adler_ = IntCast<uint32_t>(adler32_z(
      IntCast<uLong>(adler_), reinterpret_cast<const Bytef*>(src.data()),
      IntCast<z_size_t>(src.size())));
}

}  // namespace riegeli


================================================
FILE: riegeli/digests/adler32_digester.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_DIGESTS_ADLER32_DIGESTER_H_
#define RIEGELI_DIGESTS_ADLER32_DIGESTER_H_

#include <stdint.h>

#include "absl/strings/string_view.h"

namespace riegeli {

// A digester computing Adler32 checksums, for `DigestingReader` and
// `DigestingWriter`.
class Adler32Digester {
 public:
  Adler32Digester() : Adler32Digester(1) {}

  explicit Adler32Digester(uint32_t seed);

  Adler32Digester(const Adler32Digester& that) = default;
  Adler32Digester& operator=(const Adler32Digester& that) = default;

  void Write(absl::string_view src);
  uint32_t Digest() { return adler_; }

 private:
  uint32_t adler_;
};

}  // namespace riegeli

#endif  // RIEGELI_DIGESTS_ADLER32_DIGESTER_H_


================================================
FILE: riegeli/digests/crc32_digester.cc
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/digests/crc32_digester.h"

#include <stdint.h>

#include "absl/base/optimization.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "zconf.h"
#include "zlib.h"

namespace riegeli {

Crc32Digester::Crc32Digester(uint32_t seed) : crc_(seed) {
  // This checks CPU features.
  crc32_z(0, nullptr, 0);
}

void Crc32Digester::Write(absl::string_view src) {
  if (ABSL_PREDICT_FALSE(src.empty())) {
    // `crc32_z(state, nullptr, 0)` exceptionally returns 0, not `state`.
    return;
  }
  crc_ = IntCast<uint32_t>(crc32_z(IntCast<uLong>(crc_),
                                   reinterpret_cast<const Bytef*>(src.data()),
                                   IntCast<z_size_t>(src.size())));
}

}  // namespace riegeli


================================================
FILE: riegeli/digests/crc32_digester.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_DIGESTS_CRC32_DIGESTER_H_
#define RIEGELI_DIGESTS_CRC32_DIGESTER_H_

#include <stdint.h>

#include "absl/strings/string_view.h"

namespace riegeli {

// A digester computing CRC32 checksums, for `DigestingReader` and
// `DigestingWriter`.
//
// This uses the polynomial x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 +
// x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 (0x104c11db7).
//
// This polynomial is used e.g. by gzip, zip, and png:
// https://en.wikipedia.org/wiki/Cyclic_redundancy_check#Polynomial_representations_of_cyclic_redundancy_checks
class Crc32Digester {
 public:
  Crc32Digester() : Crc32Digester(0) {}

  explicit Crc32Digester(uint32_t seed);

  Crc32Digester(const Crc32Digester& that) = default;
  Crc32Digester& operator=(const Crc32Digester& that) = default;

  void Write(absl::string_view src);
  uint32_t Digest() { return crc_; }

 private:
  uint32_t crc_;
};

}  // namespace riegeli

#endif  // RIEGELI_DIGESTS_CRC32_DIGESTER_H_


================================================
FILE: riegeli/digests/crc32c_digester.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_DIGESTS_CRC32C_DIGESTER_H_
#define RIEGELI_DIGESTS_CRC32C_DIGESTER_H_

#include <stddef.h>
#include <stdint.h>

#include <cstring>
#include <limits>
#include <optional>

#include "absl/base/optimization.h"
#include "absl/crc/crc32c.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/byte_fill.h"

namespace riegeli {

// A digester computing CRC32C checksums, for `DigestingReader` and
// `DigestingWriter`.
//
// This uses the polynomial x^32 + x^28 + x^27 + x^26 + x^25 + x^23 + x^22 +
// x^20 + x^19 + x^18 + x^14 + x^13 + x^11 + x^10 + x^9 + x^8 + x^6 + 1
// (0x11edc6f41).
//
// This polynomial is used e.g. by SSE4.2:
// https://en.wikipedia.org/wiki/Cyclic_redundancy_check#Polynomial_representations_of_cyclic_redundancy_checks
class Crc32cDigester {
 public:
  Crc32cDigester() : Crc32cDigester(0) {}

  explicit Crc32cDigester(uint32_t seed) : crc_(seed) {}

  Crc32cDigester(const Crc32cDigester& that) = default;
  Crc32cDigester& operator=(const Crc32cDigester& that) = default;

  void Write(absl::string_view src);
  void Write(const absl::Cord& src);
  void Write(ByteFill src);
  uint32_t Digest() { return crc_; }

 private:
  uint32_t crc_;
};

// A common way to mask CRC32C values for storage along with the data.
// These constants are used e.g. by Framed Snappy and TFRecord.

template <uint32_t delta = 0xa282ead8, int ror_bits = 15>
constexpr uint32_t MaskCrc32c(uint32_t unmasked) {
  const uint32_t rotated =
      (unmasked << (32 - ror_bits)) | (unmasked >> ror_bits);
  return rotated + delta;
}

template <uint32_t delta = 0xa282ead8, int ror_bits = 15>
constexpr uint32_t UnmaskCrc32c(uint32_t masked) {
  const uint32_t rotated = masked - delta;
  return (rotated << ror_bits) | (rotated >> (32 - ror_bits));
}

// Implementation details follow.

inline void Crc32cDigester::Write(absl::string_view src) {
  crc_ = static_cast<uint32_t>(absl::ExtendCrc32c(absl::crc32c_t{crc_}, src));
}

inline void Crc32cDigester::Write(const absl::Cord& src) {
  if (const std::optional<uint32_t> src_crc = src.ExpectedChecksum();
      src_crc != std::nullopt) {
    crc_ = static_cast<uint32_t>(absl::ConcatCrc32c(
        absl::crc32c_t{crc_}, absl::crc32c_t{*src_crc}, src.size()));
    return;
  }
  if (const std::optional<absl::string_view> flat = src.TryFlat();
      flat != std::nullopt) {
    crc_ =
        static_cast<uint32_t>(absl::ExtendCrc32c(absl::crc32c_t{crc_}, *flat));
    return;
  }
  for (const absl::string_view fragment : src.Chunks()) {
    crc_ = static_cast<uint32_t>(
        absl::ExtendCrc32c(absl::crc32c_t{crc_}, fragment));
  }
}

inline void Crc32cDigester::Write(ByteFill src) {
  if (src.fill() == '\0') {
    while (
        ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max())) {
      crc_ = static_cast<uint32_t>(absl::ExtendCrc32cByZeroes(
          absl::crc32c_t{crc_}, std::numeric_limits<size_t>::max()));
      src.Extract(std::numeric_limits<size_t>::max());
    }
    crc_ = static_cast<uint32_t>(absl::ExtendCrc32cByZeroes(
        absl::crc32c_t{crc_}, IntCast<size_t>(src.size())));
    return;
  }
  for (const absl::string_view fragment : src.blocks()) {
    crc_ = static_cast<uint32_t>(
        absl::ExtendCrc32c(absl::crc32c_t{crc_}, fragment));
  }
}

}  // namespace riegeli

#endif  // RIEGELI_DIGESTS_CRC32C_DIGESTER_H_


================================================
FILE: riegeli/digests/digest_converter.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_DIGESTS_DIGEST_CONVERTER_H_
#define RIEGELI_DIGESTS_DIGEST_CONVERTER_H_

#include <stddef.h>
#include <stdint.h>

#include <array>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/meta/type_traits.h"
#include "absl/numeric/int128.h"
#include "riegeli/endian/endian_reading.h"
#include "riegeli/endian/endian_writing.h"

namespace riegeli {

// `DigestConverterImpl<From, To>::Convert(From)` specifies how to convert a
// digest to another supported type.
//
// This template is specialized but does not have a primary definition.
template <typename From, typename To, typename Enable = void>
struct DigestConverterImpl;

// `HasDigestConverterImpl<From, To>::value` is `true` when
// `DigestConverterImpl<From, To>` is defined.

template <typename From, typename To, typename Enable = void>
struct HasDigestConverterImpl : std::false_type {};

template <typename From, typename To>
struct HasDigestConverterImpl<
    From, To,
    std::void_t<decltype(DigestConverterImpl<From, To>::Convert(
        std::declval<From>()))>> : std::true_type {};

// `DigestConverterImpl<From, To>` tries to transform `From` from types with
// more information to types with less information, or from integers or arrays
// of integers of larger sizes to arrays of integers of smaller sizes, and then
// considers further transformations, until reaching a type which can be
// natively explicitly converted to `To`.
//
// To prevent infinite recursion, in conversions from arrays of integers of
// smaller sizes to integers or arrays of integers of larger sizes, further
// conversions using `DigestConverterImpl` are not considered, only types which
// can be natively explicitly converted.

// A digest can be converted if its type can be natively explicitly converted.
// This includes the case when `To` is the same as `From`.
template <typename From, typename To>
struct DigestConverterImpl<
    From, To, std::enable_if_t<std::is_constructible_v<To, From>>> {
  template <
      typename DependentFrom = From,
      std::enable_if_t<!std::is_rvalue_reference_v<DependentFrom>, int> = 0>
  static To Convert(const From& digest) {
    return To(digest);
  }
  template <
      typename DependentFrom = From,
      std::enable_if_t<!std::is_lvalue_reference_v<DependentFrom>, int> = 0>
  static To Convert(From&& digest) {
    return To(std::forward<From>(digest));
  }
};

// `std::array<char, size>` can be converted to `std::string`.
template <size_t size, typename To>
struct DigestConverterImpl<
    std::array<char, size>, To,
    std::enable_if_t<std::conjunction_v<
        std::negation<std::is_constructible<To, std::array<char, size>>>,
        HasDigestConverterImpl<std::string, To>>>> {
  static To Convert(const std::array<char, size>& digest) {
    return DigestConverterImpl<std::string, To>::Convert(
        std::string(digest.data(), digest.size()));
  }
};

// `uint32_t`, `uint64_t`, and `absl::uint128` can be converted to
// `std::array<char, sizeof(T)>`, using Big Endian.

template <typename To>
struct DigestConverterImpl<
    uint32_t, To,
    std::enable_if_t<std::conjunction_v<
        std::negation<std::is_constructible<To, uint32_t>>,
        HasDigestConverterImpl<std::array<char, sizeof(uint32_t)>, To>>>> {
  static To Convert(uint32_t digest) {
    std::array<char, sizeof(uint32_t)> result;
    riegeli::WriteBigEndian<uint32_t>(digest, result.data());
    return DigestConverterImpl<std::array<char, sizeof(uint32_t)>, To>::Convert(
        result);
  }
};

template <typename To>
struct DigestConverterImpl<
    uint64_t, To,
    std::enable_if_t<std::conjunction_v<
        std::negation<std::is_constructible<To, uint64_t>>,
        HasDigestConverterImpl<std::array<char, sizeof(uint64_t)>, To>>>> {
  static To Convert(uint64_t digest) {
    std::array<char, sizeof(uint64_t)> result;
    riegeli::WriteBigEndian<uint64_t>(digest, result.data());
    return DigestConverterImpl<std::array<char, sizeof(uint64_t)>, To>::Convert(
        result);
  }
};

template <typename To>
struct DigestConverterImpl<
    absl::uint128, To,
    std::enable_if_t<std::conjunction_v<
        std::negation<std::is_constructible<To, absl::uint128>>,
        HasDigestConverterImpl<std::array<char, sizeof(absl::uint128)>, To>>>> {
  static To Convert(absl::uint128 digest) {
    std::array<char, sizeof(absl::uint128)> result;
    riegeli::WriteBigEndian<absl::uint128>(digest, result.data());
    return DigestConverterImpl<std::array<char, sizeof(absl::uint128)>,
                               To>::Convert(result);
  }
};

// `std::array<char, sizeof(T)>` can be converted to `uint32_t`, `uint64_t`,
// and `absl::uint128`, using Big Endian.
//
// To prevent infinite recursion, further conversions using
// `DigestConverterImpl` are not considered, only types which can be natively
// explicitly converted.

template <typename To>
struct DigestConverterImpl<std::array<char, sizeof(uint32_t)>, To,
                           std::enable_if_t<std::conjunction_v<
                               std::negation<std::is_constructible<
                                   To, std::array<char, sizeof(uint32_t)>>>,
                               std::is_constructible<To, uint32_t>>>> {
  static To Convert(std::array<char, sizeof(uint32_t)> digest) {
    return To(riegeli::ReadBigEndian<uint32_t>(digest.data()));
  }
};

template <typename To>
struct DigestConverterImpl<std::array<char, sizeof(uint64_t)>, To,
                           std::enable_if_t<std::conjunction_v<
                               std::negation<std::is_constructible<
                                   To, std::array<char, sizeof(uint64_t)>>>,
                               std::is_constructible<To, uint64_t>>>> {
  static To Convert(std::array<char, sizeof(uint64_t)> digest) {
    return To(riegeli::ReadBigEndian<uint64_t>(digest.data()));
  }
};

template <typename To>
struct DigestConverterImpl<
    std::array<char, sizeof(absl::uint128)>, To,
    std::enable_if_t<
        std::conjunction_v<std::negation<std::is_constructible<
                               To, std::array<char, sizeof(absl::uint128)>>>,
                           std::is_constructible<To, absl::uint128>>>> {
  static To Convert(std::array<char, sizeof(absl::uint128)> digest) {
    return To(riegeli::ReadBigEndian<absl::uint128>(digest.data()));
  }
};

// `std::array<uint64_t, size>` can be converted to
// `std::array<char, size * sizeof(uint64_t)>`, using Big Endian.
template <size_t size, typename To>
struct DigestConverterImpl<
    std::array<uint64_t, size>, To,
    std::enable_if_t<std::conjunction_v<
        std::negation<std::is_constructible<To, std::array<uint64_t, size>>>,
        HasDigestConverterImpl<std::array<char, size * sizeof(uint64_t)>,
                               To>>>> {
  static To Convert(const std::array<uint64_t, size>& digest) {
    std::array<char, size * sizeof(uint64_t)> result;
    riegeli::WriteBigEndians<uint64_t>(absl::MakeConstSpan(digest.data(), size),
                                       result.data());
    return DigestConverterImpl<std::array<char, size * sizeof(uint64_t)>,
                               To>::Convert(result);
  }
};

// `std::array<char, size * sizeof(uint64_t)>` can be converted to
// `std::array<uint64_t, size>`, using Big Endian.
//
// To prevent infinite recursion, further conversions using
// `DigestConverterImpl` are not considered, only types which can be natively
// explicitly converted.
template <size_t size, typename To>
struct DigestConverterImpl<
    std::array<char, size>, To,
    std::enable_if_t<std::conjunction_v<
        std::bool_constant<size % sizeof(uint64_t) == 0>,
        std::negation<std::is_constructible<To, std::array<char, size>>>,
        std::is_constructible<
            To, std::array<uint64_t, size / sizeof(uint64_t)>>>>> {
  static To Convert(const std::array<char, size>& digest) {
    std::array<uint64_t, size / sizeof(uint64_t)> result;
    riegeli::ReadBigEndians<uint64_t>(
        digest.data(), absl::MakeSpan(result.data(), size / sizeof(uint64_t)));
    return To(result);
  }
};

// `DigestConverter<From, To>` extends `DigestConverterImpl<From, To>` with the
// case of `To` being a reference.

template <typename From, typename To, typename Enable = void>
struct DigestConverter;

template <typename From, typename To>
struct DigestConverter<From&, To&,
                       std::enable_if_t<std::is_convertible_v<From*, To*>>> {
  static To& Convert(From& digest) { return digest; }
};

template <typename From, typename To>
struct DigestConverter<
    From, To,
    std::enable_if_t<std::conjunction_v<
        std::negation<std::is_reference<To>>,
        HasDigestConverterImpl<absl::remove_cvref_t<From>, To>>>>
    : DigestConverterImpl<absl::remove_cvref_t<From>, To> {
  static_assert(
      std::is_convertible_v<
          decltype(DigestConverterImpl<absl::remove_cvref_t<From>, To>::Convert(
              std::declval<From>())),
          To>,
      "DigestConverterImpl<From, To>::Convert() must return To");
};

// `HasDigestConverter<From, To>::value` is `true` when
// `DigestConverter<From, To>` is defined or when `To` is `void`.
template <typename From, typename To>
struct HasDigestConverter
    : std::disjunction<
          std::is_void<To>,
          std::conjunction<std::is_lvalue_reference<To>,
                           std::is_lvalue_reference<From>,
                           std::is_convertible<std::remove_reference_t<From>*,
                                               std::remove_reference_t<To>*>>,
          std::conjunction<
              std::negation<std::is_reference<To>>,
              HasDigestConverterImpl<absl::remove_cvref_t<From>, To>>> {};

// Converts a digest returned by `digest_function` to another supported type.
//
// The digest is passed as `digest_function` to support `void`.

template <
    typename To, typename DigestFunction,
    std::enable_if_t<
        std::conjunction_v<
            std::negation<std::is_void<To>>,
            HasDigestConverter<decltype(std::declval<DigestFunction>()()), To>>,
        int> = 0>
inline To ConvertDigest(DigestFunction&& digest_function) {
  return DigestConverter<decltype(std::declval<DigestFunction>()()), To>::
      Convert(std::forward<DigestFunction>(digest_function)());
}

template <typename To, typename DigestFunction,
          std::enable_if_t<std::is_void_v<To>, int> = 0>
inline void ConvertDigest(DigestFunction&& digest_function) {
  std::forward<DigestFunction>(digest_function)();
}

namespace digest_converter_internal {

// A placeholder type to support an optional template parameter specifying the
// desired digest type, which precedes other template parameters to be deduced
// from function arguments.
//
// The optional template parameter should default to the digest type deduced
// from later template parameters, but this cannot be written directly because
// they are not in scope yet. Instead, the optional template parameter defaults
// to `NoConversion` and is resolved later.
class NoConversion;

template <typename From, typename To>
struct ResolveNoConversionImpl {
  using type = To;
};

template <typename From>
struct ResolveNoConversionImpl<From, NoConversion> {
  using type = From;
};

template <typename From, typename To>
using ResolveNoConversion = typename ResolveNoConversionImpl<From, To>::type;

template <typename From, typename To>
struct HasDigestConverterOrNoConversion : HasDigestConverter<From, To> {};

template <typename From>
struct HasDigestConverterOrNoConversion<From, NoConversion> : std::true_type {};

}  // namespace digest_converter_internal

}  // namespace riegeli

#endif  // RIEGELI_DIGESTS_DIGEST_CONVERTER_H_


================================================
FILE: riegeli/digests/digester_handle.cc
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/digests/digester_handle.h"

#include <stddef.h>

#include <optional>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/type_erased_ref.h"
#include "riegeli/base/types.h"

namespace riegeli {

void DigesterBaseHandle::FailedDigestMethodDefault() {
  RIEGELI_CHECK_UNREACHABLE()
      << "DigesterHandle::Digest() called on a default-constructed "
         "DigesterHandle with a non-void DigesterHandle::DigestType";
}

void DigesterBaseHandle::SetWriteSizeHintMethodDefault(
    ABSL_ATTRIBUTE_UNUSED TypeErasedRef target,
    ABSL_ATTRIBUTE_UNUSED std::optional<Position> write_size_hint) {}

bool DigesterBaseHandle::WriteMethodDefault(
    ABSL_ATTRIBUTE_UNUSED TypeErasedRef target,
    ABSL_ATTRIBUTE_UNUSED absl::string_view src) {
  return true;
}

bool DigesterBaseHandle::WriteChainMethodDefault(
    ABSL_ATTRIBUTE_UNUSED TypeErasedRef target,
    ABSL_ATTRIBUTE_UNUSED const Chain& src) {
  return true;
}

bool DigesterBaseHandle::WriteCordMethodDefault(
    ABSL_ATTRIBUTE_UNUSED TypeErasedRef target,
    ABSL_ATTRIBUTE_UNUSED const absl::Cord& src) {
  return true;
}

bool DigesterBaseHandle::WriteByteFillMethodDefault(
    ABSL_ATTRIBUTE_UNUSED TypeErasedRef target,
    ABSL_ATTRIBUTE_UNUSED ByteFill src) {
  return true;
}

bool DigesterBaseHandle::CloseMethodDefault(
    ABSL_ATTRIBUTE_UNUSED TypeErasedRef target) {
  return true;
}

absl::Status DigesterBaseHandle::StatusMethodDefault(
    ABSL_ATTRIBUTE_UNUSED TypeErasedRef target) {
  return absl::OkStatus();
}

bool DigesterBaseHandle::WriteChainFallback(
    TypeErasedRef target, const Chain& src,
    bool (*write)(TypeErasedRef target, absl::string_view src)) {
  for (const absl::string_view fragment : src.blocks()) {
    if (ABSL_PREDICT_FALSE(!write(target, fragment))) return false;
  }
  return true;
}

void DigesterBaseHandle::WriteChainFallback(
    TypeErasedRef target, const Chain& src,
    void (*write)(TypeErasedRef target, absl::string_view src)) {
  for (const absl::string_view fragment : src.blocks()) write(target, fragment);
}

bool DigesterBaseHandle::WriteCordFallback(
    TypeErasedRef target, const absl::Cord& src,
    bool (*write)(TypeErasedRef target, absl::string_view src)) {
  if (const std::optional<absl::string_view> flat = src.TryFlat();
      flat != std::nullopt) {
    return write(target, *flat);
  }
  for (const absl::string_view fragment : src.Chunks()) {
    if (ABSL_PREDICT_FALSE(!write(target, fragment))) return false;
  }
  return true;
}

void DigesterBaseHandle::WriteCordFallback(
    TypeErasedRef target, const absl::Cord& src,
    void (*write)(TypeErasedRef target, absl::string_view src)) {
  if (const std::optional<absl::string_view> flat = src.TryFlat();
      flat != std::nullopt) {
    write(target, *flat);
    return;
  }
  for (const absl::string_view fragment : src.Chunks()) write(target, fragment);
}

bool DigesterBaseHandle::WriteByteFillFallback(
    TypeErasedRef target, ByteFill src,
    bool (*write)(TypeErasedRef target, absl::string_view src)) {
  for (const absl::string_view fragment : src.blocks()) {
    if (ABSL_PREDICT_FALSE(!write(target, fragment))) return false;
  }
  return true;
}

void DigesterBaseHandle::WriteByteFillFallback(
    TypeErasedRef target, ByteFill src,
    void (*write)(TypeErasedRef target, absl::string_view src)) {
  for (const absl::string_view fragment : src.blocks()) write(target, fragment);
}

}  // namespace riegeli


================================================
FILE: riegeli/digests/digester_handle.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_DIGESTS_DIGESTER_HANDLE_H_
#define RIEGELI_DIGESTS_DIGESTER_HANDLE_H_

#include <stddef.h>

#include <cstddef>
#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/numeric/int128.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/has_absl_stringify.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/any.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/dependency_manager.h"
#include "riegeli/base/type_erased_ref.h"
#include "riegeli/base/type_traits.h"
#include "riegeli/base/types.h"
#include "riegeli/digests/digest_converter.h"

namespace riegeli {

// `SupportsDigesterBaseHandle<T>::value` is `true` when `T&` is a valid
// constructor argument for `DigesterBaseHandle`.

template <typename T, typename Enable = void>
struct SupportsDigesterBaseHandle : std::false_type {};

template <typename T>
struct SupportsDigesterBaseHandle<
    T, std::enable_if_t<!std::is_const_v<T>,
                        std::void_t<decltype(std::declval<T&>().Write(
                            std::declval<absl::string_view>()))>>>
    : std::true_type {};

// Type-erased pointer to a target object called a digester, which observes data
// being read or written.
//
// The target should support:
//
// ```
//   // All of the following methods returning `bool` return `true` on success.
//   // They may also return `void` which is treated as `true`.
//
//   // If `write_size_hint` is not `std::nullopt`, hints that this amount of
//   // data will be written from the current position. This may improve
//   // performance and memory usage.
//   //
//   // If the hint turns out to not match reality, nothing breaks. It is better
//   // if `write_size_hint` is slightly too large than slightly too small.
//   //
//   // Optional. If absent, does nothing.
//   void SetWriteSizeHint(std::optional<Position> write_size_hint);
//
//   // Called with consecutive fragments of data.
//   //
//   // Precondition: the digester is open.
//   bool Write(absl::string_view src);
//
//   // Called with consecutive fragments of data.
//   //
//   // Precondition: the digester is open.
//   //
//   // Optional. If absent, implemented in terms of `Write(absl::string_view)`.
//   bool Write(const Chain& src);
//
//   // Called with consecutive fragments of data.
//   //
//   // Precondition: the digester is open.
//   //
//   // Optional. If absent, implemented in terms of `Write(absl::string_view)`.
//   bool Write(const absl::Cord& src);
//
//   // Can be called instead of `Write()` when data consists of zeros.
//   //
//   // Precondition: the digester is open.
//   //
//   // Optional. If absent, implemented in terms of `Write(absl::string_view)`.
//   bool Write(ByteFill src);
//
//   // Optionally called when nothing more will be digested. This can make
//   // `Digest()` more efficient. Resources can be freed. Marks the digester
//   // as not open.
//   //
//   // Does nothing if the digester is not open.
//   //
//   // Optional. If absent, does nothing.
//   bool Close();
//
//   // Returns an `absl::Status` describing the failure if the digester is
//   // failed.
//   //
//   // Can return `absl::OkStatus()` if tracking the status is not supported.
//   //
//   // Optional. If absent, `absl::OkStatus()` is assumed.
//   absl::Status status() const;
// ```
//
// `DigesterHandle<DigestType>` extends `DigesterBaseHandle` with `Digest()`
// returning `DigestType`.
//
// For digesting many small values it is better to use `DigestingWriter` which
// adds a buffering layer.
class DigesterBaseHandle : public WithEqual<DigesterBaseHandle> {
 public:
  // Creates a `DigesterBaseHandle` which does not refer to a target.
  DigesterBaseHandle() = default;
  /*implicit*/ DigesterBaseHandle(std::nullptr_t) {}

  // Creates a `DigesterBaseHandle` which refers to `target`.
  template <
      typename T,
      std::enable_if_t<std::conjunction_v<NotSameRef<DigesterBaseHandle, T&>,
                                          SupportsDigesterBaseHandle<T>>,
                       int> = 0>
  /*implicit*/ DigesterBaseHandle(T& target ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : methods_(&kMethods<T>), target_(target) {}

  DigesterBaseHandle(const DigesterBaseHandle& that) = default;
  DigesterBaseHandle& operator=(const DigesterBaseHandle& that) = default;

  friend bool operator==(DigesterBaseHandle a, std::nullptr_t) {
    return a.target().empty();
  }

  // If `write_size_hint` is not `std::nullopt`, hints that this amount of data
  // will be written from the current position. This may improve performance and
  // memory usage.
  //
  // If the hint turns out to not match reality, nothing breaks. It is better if
  // `write_size_hint` is slightly too large than slightly too small.
  void SetWriteSizeHint(std::optional<Position> write_size_hint) {
    methods_->set_write_size_hint(target_, write_size_hint);
  }

  // Called with consecutive fragments of data.
  //
  // Precondition: the digester is open.
  bool Write(char src) { return Write(absl::string_view(&src, 1)); }
#if __cpp_char8_t
  bool Write(char8_t src) { return Write(static_cast<char>(src)); }
#endif
  bool Write(BytesRef src) { return methods()->write(target(), src); }
  ABSL_ATTRIBUTE_ALWAYS_INLINE
  bool Write(const char* src) { return Write(absl::string_view(src)); }
  bool Write(const Chain& src) { return methods()->write_chain(target(), src); }
  bool Write(const absl::Cord& src) {
    return methods()->write_cord(target(), src);
  }
  bool Write(ByteFill src) { return methods()->write_byte_fill(target(), src); }
  template <
      typename Src,
      std::enable_if_t<
          std::conjunction_v<
              absl::HasAbslStringify<Src>,
              std::negation<std::is_convertible<Src&&, BytesRef>>,
              std::negation<std::is_convertible<Src&&, const Chain&>>,
              std::negation<std::is_convertible<Src&&, const absl::Cord&>>,
              std::negation<std::is_convertible<Src&&, ByteFill>>>,
          int> = 0>
  bool Write(Src&& src);

  // Numeric types supported by `Writer::Write()` are not supported by
  // `DigesterBaseHandle::Write()`. Use `DigestingWriter` instead or convert
  // them to strings.
  bool Write(signed char) = delete;
  bool Write(unsigned char) = delete;
  bool Write(short) = delete;
  bool Write(unsigned short) = delete;
  bool Write(int) = delete;
  bool Write(unsigned) = delete;
  bool Write(long) = delete;
  bool Write(unsigned long) = delete;
  bool Write(long long) = delete;
  bool Write(unsigned long long) = delete;
  bool Write(absl::int128) = delete;
  bool Write(absl::uint128) = delete;
  bool Write(float) = delete;
  bool Write(double) = delete;
  bool Write(long double) = delete;
  bool Write(bool) = delete;
  bool Write(wchar_t) = delete;
  bool Write(char16_t) = delete;
  bool Write(char32_t) = delete;

  // Called when nothing more will be digested. This can make `Digest()` more
  // efficient. Resources can be freed. Marks the digester as not open.
  //
  // Does nothing if the digester is not open.
  bool Close() { return methods()->close(target()); }

  // Returns an `absl::Status` describing the failure if the digester is
  // failed.
  //
  // Can return `absl::OkStatus()` if tracking the status is not supported.
  absl::Status status() const { return methods()->status(target()); }

 protected:
  ABSL_ATTRIBUTE_NORETURN static void FailedDigestMethodDefault();

 private:
  template <typename Function,
            std::enable_if_t<
                std::is_same_v<decltype(std::declval<Function&&>()()), bool>,
                int> = 0>
  static bool ConvertToBool(Function&& function) {
    return std::forward<Function>(function)();
  }
  template <
      typename Function,
      std::enable_if_t<std::is_void_v<decltype(std::declval<Function&&>()())>,
                       int> = 0>
  static bool ConvertToBool(Function&& function) {
    std::forward<Function>(function)();
    return true;
  }

  template <typename T, typename Enable = void>
  struct DigesterTargetHasSetWriteSizeHint : std::false_type {};
  template <typename T>
  struct DigesterTargetHasSetWriteSizeHint<
      T, std::void_t<decltype(std::declval<T&>().SetWriteSizeHint(
             std::declval<std::optional<Position>>()))>> : std::true_type {};

  template <typename T, typename Enable = void>
  struct DigesterTargetHasWriteChain : std::false_type {};
  template <typename T>
  struct DigesterTargetHasWriteChain<
      T, std::void_t<decltype(std::declval<T&>().Write(
             std::declval<const Chain&>()))>> : std::true_type {};

  template <typename T, typename Enable = void>
  struct DigesterTargetHasWriteCord : std::false_type {};
  template <typename T>
  struct DigesterTargetHasWriteCord<
      T, std::void_t<decltype(std::declval<T&>().Write(
             std::declval<const absl::Cord&>()))>> : std::true_type {};

  template <typename T, typename Enable = void>
  struct DigesterTargetHasWriteByteFill : std::false_type {};
  template <typename T>
  struct DigesterTargetHasWriteByteFill<
      T,
      std::void_t<decltype(std::declval<T&>().Write(std::declval<ByteFill>()))>>
      : std::true_type {};

  template <typename T, typename Enable = void>
  struct DigesterTargetHasClose : std::false_type {};
  template <typename T>
  struct DigesterTargetHasClose<
      T, std::void_t<decltype(std::declval<T&>().Close())>> : std::true_type {};

  template <typename T, typename Enable = void>
  struct DigesterTargetHasStatus : std::false_type {};
  template <typename T>
  struct DigesterTargetHasStatus<
      T, std::enable_if_t<std::is_convertible_v<
             decltype(std::declval<const T&>().status()), absl::Status>>>
      : std::true_type {};

  static void SetWriteSizeHintMethodDefault(
      TypeErasedRef target, std::optional<Position> write_size_hint);
  static bool WriteMethodDefault(TypeErasedRef target, absl::string_view src);
  static bool WriteChainMethodDefault(TypeErasedRef target, const Chain& src);
  static bool WriteCordMethodDefault(TypeErasedRef target,
                                     const absl::Cord& src);
  static bool WriteByteFillMethodDefault(TypeErasedRef target, ByteFill src);
  static bool CloseMethodDefault(TypeErasedRef target);
  static absl::Status StatusMethodDefault(TypeErasedRef target);

  template <typename T>
  static void SetWriteSizeHintMethod(TypeErasedRef target,
                                     std::optional<Position> write_size_hint) {
    if constexpr (DigesterTargetHasSetWriteSizeHint<T>::value) {
      target.Cast<T&>().SetWriteSizeHint(write_size_hint);
    }
  }

  template <typename T>
  static auto RawWriteMethod(TypeErasedRef target, absl::string_view src) {
    return target.Cast<T&>().Write(src);
  }

  template <typename T>
  static bool WriteMethod(TypeErasedRef target, absl::string_view src) {
    return ConvertToBool([&] { return RawWriteMethod<T>(target, src); });
  }

  static bool WriteChainFallback(TypeErasedRef target, const Chain& src,
                                 bool (*write)(TypeErasedRef target,
                                               absl::string_view src));
  static void WriteChainFallback(TypeErasedRef target, const Chain& src,
                                 void (*write)(TypeErasedRef target,
                                               absl::string_view src));

  template <typename T>
  static bool WriteChainMethod(TypeErasedRef target, const Chain& src) {
    return ConvertToBool([&] {
      if constexpr (DigesterTargetHasWriteChain<T>::value) {
        return target.Cast<T&>().Write(src);
      } else {
        return WriteChainFallback(target, src, RawWriteMethod<T>);
      }
    });
  }

  static bool WriteCordFallback(TypeErasedRef target, const absl::Cord& src,
                                bool (*write)(TypeErasedRef target,
                                              absl::string_view src));
  static void WriteCordFallback(TypeErasedRef target, const absl::Cord& src,
                                void (*write)(TypeErasedRef target,
                                              absl::string_view src));

  template <typename T>
  static bool WriteCordMethod(TypeErasedRef target, const absl::Cord& src) {
    return ConvertToBool([&] {
      if constexpr (DigesterTargetHasWriteCord<T>::value) {
        return target.Cast<T&>().Write(src);
      } else {
        return WriteCordFallback(target, src, RawWriteMethod<T>);
      }
    });
  }

  static bool WriteByteFillFallback(TypeErasedRef target, ByteFill src,
                                    bool (*write)(TypeErasedRef target,
                                                  absl::string_view src));
  static void WriteByteFillFallback(TypeErasedRef target, ByteFill src,
                                    void (*write)(TypeErasedRef target,
                                                  absl::string_view src));

  template <typename T>
  static bool WriteByteFillMethod(TypeErasedRef target, ByteFill src) {
    return ConvertToBool([&] {
      if constexpr (DigesterTargetHasWriteByteFill<T>::value) {
        return target.Cast<T&>().Write(src);
      } else {
        return WriteByteFillFallback(target, src, RawWriteMethod<T>);
      }
    });
  }

  template <typename T>
  static bool CloseMethod(TypeErasedRef target) {
    if constexpr (DigesterTargetHasClose<T>::value) {
      return ConvertToBool([&] { return target.Cast<T&>().Close(); });
    } else {
      return true;
    }
  }

  template <typename T>
  static absl::Status StatusMethod(TypeErasedRef target) {
    if constexpr (DigesterTargetHasStatus<T>::value) {
      return target.Cast<const T&>().status();
    } else {
      return absl::OkStatus();
    }
  }

 protected:
  struct Methods {
    void (*set_write_size_hint)(TypeErasedRef target,
                                std::optional<Position> write_size_hint);
    bool (*write)(TypeErasedRef target, absl::string_view src);
    bool (*write_chain)(TypeErasedRef target, const Chain& src);
    bool (*write_cord)(TypeErasedRef target, const absl::Cord& src);
    bool (*write_byte_fill)(TypeErasedRef target, ByteFill src);
    bool (*close)(TypeErasedRef target);
    absl::Status (*status)(TypeErasedRef target);
  };

  static constexpr Methods kMethodsDefault = {SetWriteSizeHintMethodDefault,
                                              WriteMethodDefault,
                                              WriteChainMethodDefault,
                                              WriteCordMethodDefault,
                                              WriteByteFillMethodDefault,
                                              CloseMethodDefault,
                                              StatusMethodDefault};

  template <typename T>
  static constexpr Methods kMethods = {SetWriteSizeHintMethod<T>,
                                       WriteMethod<T>,
                                       WriteChainMethod<T>,
                                       WriteCordMethod<T>,
                                       WriteByteFillMethod<T>,
                                       CloseMethod<T>,
                                       StatusMethod<T>};

  explicit DigesterBaseHandle(const Methods* methods, TypeErasedRef target)
      : methods_(methods), target_(target) {}

  const Methods* methods() const { return methods_; }
  TypeErasedRef target() const { return target_; }

 private:
  class DigesterStringifySink;

  const Methods* methods_ = &kMethodsDefault;
  TypeErasedRef target_;
};

namespace digester_handle_internal {

template <typename T, typename Enable = void>
struct DigestOfDigesterTarget {
  using type = void;
};

template <typename T>
struct DigestOfDigesterTarget<
    T, std::void_t<decltype(std::declval<T&>().Digest())>> {
  using type = decltype(std::declval<T&>().Digest());
};

}  // namespace digester_handle_internal

// `SupportsDigesterHandle<T, DigestType>::value` is `true` when `T&` is a valid
// constructor argument for `DigesterHandle<DigestType>`.

template <typename T, typename DigestType, typename Enable = void>
struct SupportsDigesterHandle : std::false_type {};

template <typename T, typename DigestType>
struct SupportsDigesterHandle<
    T, DigestType,
    std::enable_if_t<std::conjunction_v<
        SupportsDigesterBaseHandle<T>,
        HasDigestConverter<
            typename digester_handle_internal::DigestOfDigesterTarget<T>::type,
            DigestType>>>> : std::true_type {};

// `DigesterHandle<DigestType>` extends `DigesterBaseHandle` with `Digest()`
// returning some data of type `DigestType` called a digest, e.g. a checksum.
//
// The digester should support:
//
// ```
//   // Returns the digest of data written so far. Its type and meaning depends
//   // on the concrete digester. Unchanged by `Close()`.
//   //
//   // `OriginalDigestType` can be any type convertible to `DigestType` using
//   // `DigestConverter`.
//   //
//   // Depending on the digester, `Digest()` can be more efficient if `Close()`
//   // is called before.
//   //
//   // Many digesters support calling `Digest()` and then accepting more data
//   // or calling `Digest()` again, but this is not guaranteed.
//   OriginalDigestType Digest();
// ```
//
// `DigestType` can be `void` for digesters used for their side effects.
template <typename DigestTypeParam>
class DigesterHandle : public DigesterBaseHandle {
 public:
  // The type of the digest.
  using DigestType = DigestTypeParam;

  // Creates a `DigesterHandle` which does not refer to a target.
  DigesterHandle() noexcept
      : DigesterBaseHandle(&kMethodsDefault, TypeErasedRef()) {}
  /*implicit*/ DigesterHandle(std::nullptr_t) : DigesterHandle() {}

  // Creates a `DigesterHandle` which refers to `target`.
  template <typename T,
            std::enable_if_t<
                std::conjunction_v<NotSameRef<DigesterHandle, T&>,
                                   SupportsDigesterHandle<T, DigestType>>,
                int> = 0>
  /*implicit*/ DigesterHandle(T& target ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : DigesterBaseHandle(&kMethods<T>, TypeErasedRef(target)) {}

  DigesterHandle(const DigesterHandle& that) = default;
  DigesterHandle& operator=(const DigesterHandle& that) = default;

  // Returns the digest of data written so far. Its type and meaning depends on
  // the concrete digester. Unchanged by `Close()`.
  //
  // The digest is converted to `DesiredDigestType` using `DigestConverter`.
  template <
      typename DesiredDigestType = DigestType,
      std::enable_if_t<HasDigestConverter<DigestType, DesiredDigestType>::value,
                       int> = 0>
  DesiredDigestType Digest() {
    return ConvertDigest<DesiredDigestType>(
        [&]() -> DigestType { return methods()->digest(target()); });
  }

 private:
  template <typename T, typename Enable = void>
  struct DigesterTargetHasDigest : std::false_type {};
  template <typename T>
  struct DigesterTargetHasDigest<
      T, std::void_t<decltype(std::declval<T&>().Digest())>> : std::true_type {
  };

  static DigestType DigestMethodDefault(
      ABSL_ATTRIBUTE_UNUSED TypeErasedRef target) {
    if constexpr (!std::is_void_v<DigestType>) FailedDigestMethodDefault();
  }

  template <typename T>
  static DigestType DigestMethod(TypeErasedRef target) {
    if constexpr (DigesterTargetHasDigest<T>::value) {
      return ConvertDigest<DigestType>(
          [&]() -> decltype(auto) { return target.Cast<T&>().Digest(); });
    } else {
      static_assert(std::is_void_v<DigestType>,
                    "DigestType must be void for digesters without Digest().");
    }
  }

  struct Methods : DigesterBaseHandle::Methods {
    // MSVC does not like the `DigestType` alias here for some reason.
    DigestTypeParam (*digest)(TypeErasedRef target);
  };

  static constexpr Methods kMethodsDefault = {
      DigesterBaseHandle::kMethodsDefault, DigestMethodDefault};

  template <typename T>
  static constexpr Methods kMethods = {DigesterBaseHandle::kMethods<T>,
                                       DigestMethod<T>};

  const Methods* methods() const {
    return static_cast<const Methods*>(DigesterBaseHandle::methods());
  }
};

DigesterHandle() -> DigesterHandle<DeleteCtad<>>;
DigesterHandle(std::nullptr_t) -> DigesterHandle<DeleteCtad<std::nullptr_t>>;
template <typename T,
          std::enable_if_t<SupportsDigesterBaseHandle<T>::value, int> = 0>
explicit DigesterHandle(T& target) -> DigesterHandle<
    typename digester_handle_internal::DigestOfDigesterTarget<T>::type>;

// Specialization of `DependencyImpl<DigesterBaseHandle, Manager>` when
// `DependencyManagerRef<Manager>` is a valid digester target.
//
// Specialized separately for `get()` to return `DigesterHandle<DigestType>`.
template <typename Manager>
class DependencyImpl<
    DigesterBaseHandle, Manager,
    std::enable_if_t<std::conjunction_v<
        std::is_pointer<DependencyManagerPtr<Manager>>,
        SupportsDigesterBaseHandle<DependencyManagerRef<Manager>>>>>
    : public DependencyManager<Manager> {
 public:
  using DependencyImpl::DependencyManager::DependencyManager;

  // Returns `DigesterHandle<DigestType>` rather than `DigesterBaseHandle`.
  DigesterHandle<typename digester_handle_internal::DigestOfDigesterTarget<
      DependencyManagerRef<Manager>>::type>
  get() const {
    return DigesterHandle<
        typename digester_handle_internal::DigestOfDigesterTarget<
            DependencyManagerRef<Manager>>::type>(*this->ptr());
  }

 protected:
  DependencyImpl(const DependencyImpl& that) = default;
  DependencyImpl& operator=(const DependencyImpl& that) = default;

  DependencyImpl(DependencyImpl&& that) = default;
  DependencyImpl& operator=(DependencyImpl&& that) = default;

  ~DependencyImpl() = default;
};

// The type of the digest returned by the digester provided by `Digester`.
template <typename Digester>
using DigestOf =
    typename Dependency<DigesterBaseHandle, Digester>::Subhandle::DigestType;

// Type-erased digester returning a digest of type `DigestType`.
template <typename DigestType>
using AnyDigester = Any<DigesterHandle<DigestType>>;

// Implementation details follow.

class DigesterBaseHandle::DigesterStringifySink {
 public:
  explicit DigesterStringifySink(DigesterBaseHandle digester)
      : digester_(digester) {}

  void Append(size_t length, char fill) {
    if (ABSL_PREDICT_FALSE(!digester_.Write(ByteFill(length, fill)))) {
      ok_ = false;
    }
  }
  void Append(absl::string_view src) {
    if (ABSL_PREDICT_FALSE(!digester_.Write(src))) ok_ = false;
  }
  friend void AbslFormatFlush(DigesterStringifySink* dest,
                              absl::string_view src) {
    dest->Append(src);
  }

  bool ok() const { return ok_; }

 private:
  DigesterBaseHandle digester_;
  bool ok_ = true;
};

template <typename Src,
          std::enable_if_t<
              std::conjunction_v<
                  absl::HasAbslStringify<Src>,
                  std::negation<std::is_convertible<Src&&, BytesRef>>,
                  std::negation<std::is_convertible<Src&&, const Chain&>>,
                  std::negation<std::is_convertible<Src&&, const absl::Cord&>>,
                  std::negation<std::is_convertible<Src&&, ByteFill>>>,
              int>>
bool DigesterBaseHandle::Write(Src&& src) {
  DigesterStringifySink sink(*this);
  AbslStringify(sink, std::forward<Src>(src));
  return sink.ok();
}

}  // namespace riegeli

#endif  // RIEGELI_DIGESTS_DIGESTER_HANDLE_H_


================================================
FILE: riegeli/digests/digesting_reader.cc
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/digests/digesting_reader.h"

#include <stddef.h>

#include <limits>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/digests/digester_handle.h"

namespace riegeli {

namespace {

ABSL_ATTRIBUTE_COLD absl::Status FailedStatus(DigesterBaseHandle digester) {
  absl::Status status = digester.status();
  if (status.ok()) status = absl::UnknownError("Digester failed");
  return status;
}

}  // namespace

bool DigestingReaderBase::FailFromDigester() {
  const DigesterBaseHandle digester = GetDigester();
  return Fail(FailedStatus(digester));
}

void DigestingReaderBase::Done() {
  if (ABSL_PREDICT_TRUE(ok())) {
    Reader& src = *SrcReader();
    SyncBuffer(src);
  }
  Reader::Done();
}

absl::Status DigestingReaderBase::AnnotateStatusImpl(absl::Status status) {
  // Fully delegate annotations to `*SrcReader()`.
  if (is_open()) {
    Reader& src = *SrcReader();
    const bool sync_buffer_ok = SyncBuffer(src);
    status = src.AnnotateStatus(std::move(status));
    if (ABSL_PREDICT_TRUE(sync_buffer_ok)) MakeBuffer(src);
  }
  return status;
}

bool DigestingReaderBase::PullSlow(size_t min_length,
                                   size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Reader::PullSlow(): "
         "enough data available, use Pull() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(src))) return false;
  const bool pull_ok = src.Pull(min_length, recommended_length);
  MakeBuffer(src);
  return pull_ok;
}

bool DigestingReaderBase::ReadSlow(size_t length, char* dest) {
  RIEGELI_ASSERT_LT(available(), length)
      << "Failed precondition of Reader::ReadSlow(char*): "
         "enough data available, use Read(char*) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(src))) return false;
  size_t length_read;
  bool read_ok = src.Read(length, dest, &length_read);
  if (length_read > 0) {
    if (ABSL_PREDICT_FALSE(
            !WriteToDigester(absl::string_view(dest, length_read)))) {
      RIEGELI_EVAL_ASSERT(!FailFromDigester());
      read_ok = false;
    }
  }
  MakeBuffer(src);
  return read_ok;
}

bool DigestingReaderBase::ReadSlow(size_t length, Chain& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "enough data available, use Read(Chain&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "Chain size overflow";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(src))) return false;
  Chain data;
  bool read_ok = src.Read(length, data);
  if (!data.empty()) {
    DigesterBaseHandle digester = GetDigester();
    if (ABSL_PREDICT_FALSE(!digester.Write(data))) {
      RIEGELI_EVAL_ASSERT(!FailFromDigester());
      read_ok = false;
    } else {
      dest.Append(std::move(data));
    }
  }
  MakeBuffer(src);
  return read_ok;
}

bool DigestingReaderBase::ReadSlow(size_t length, absl::Cord& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "enough data available, use Read(Cord&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "Cord size overflow";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(src))) return false;
  absl::Cord data;
  bool read_ok = src.Read(length, data);
  if (!data.empty()) {
    DigesterBaseHandle digester = GetDigester();
    if (ABSL_PREDICT_FALSE(!digester.Write(data))) {
      RIEGELI_EVAL_ASSERT(!FailFromDigester());
      read_ok = false;
    } else {
      dest.Append(std::move(data));
    }
  }
  MakeBuffer(src);
  return read_ok;
}

bool DigestingReaderBase::ReadSomeSlow(size_t max_length, char* dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "nothing to read, use ReadSome(char*) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "some data available, use ReadSome(char*) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(src))) return false;
  size_t length_read;
  bool read_ok = src.ReadSome(max_length, dest, &length_read);
  if (ABSL_PREDICT_TRUE(read_ok)) {
    if (ABSL_PREDICT_FALSE(
            !WriteToDigester(absl::string_view(dest, length_read)))) {
      RIEGELI_EVAL_ASSERT(!FailFromDigester());
      read_ok = false;
    }
  }
  MakeBuffer(src);
  return read_ok;
}

void DigestingReaderBase::ReadHintSlow(size_t min_length,
                                       size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Reader::ReadHintSlow(): "
         "enough data available, use ReadHint() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return;
  Reader& src = *SrcReader();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(src))) return;
  src.ReadHint(min_length, recommended_length);
  MakeBuffer(src);
}

bool DigestingReaderBase::SupportsSize() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsSize();
}

std::optional<Position> DigestingReaderBase::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  Reader& src = *SrcReader();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(src))) return std::nullopt;
  const std::optional<Position> size = src.Size();
  MakeBuffer(src);
  return size;
}

bool DigestingReaderBase::SupportsNewReader() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsNewReader();
}

std::unique_ptr<Reader> DigestingReaderBase::NewReaderImpl(
    Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point
  // if `SrcReader()->SupportsNewReader()`.
  Reader& src = *SrcReader();
  std::unique_ptr<Reader> reader = src.NewReader(initial_pos);
  if (ABSL_PREDICT_FALSE(reader == nullptr)) {
    FailWithoutAnnotation(src.status());
  }
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/digests/digesting_reader.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_DIGESTS_DIGESTING_READER_H_
#define RIEGELI_DIGESTS_DIGESTING_READER_H_

#include <stddef.h>

#include <memory>
#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/digests/digest_converter.h"
#include "riegeli/digests/digester_handle.h"

namespace riegeli {

// Template parameter independent part of `DigestingReader`.
class DigestingReaderBase : public Reader {
 public:
  // Returns the `DigesterBaseHandle`. Unchanged by `Close()`.
  virtual DigesterBaseHandle GetDigester() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns the original `Reader`. Unchanged by `Close()`.
  virtual Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool SupportsSize() override;
  bool SupportsNewReader() override;

 protected:
  using Reader::Reader;

  DigestingReaderBase(DigestingReaderBase&& that) noexcept;
  DigestingReaderBase& operator=(DigestingReaderBase&& that) noexcept;

  void Initialize(Reader* src, DigesterBaseHandle digester);
  ABSL_ATTRIBUTE_COLD bool FailFromDigester();

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;

  virtual bool WriteToDigester(absl::string_view src) = 0;

  // Sets cursor of `src` to cursor of `*this`, digesting what has been read
  // from the buffer (until `cursor()`).
  bool SyncBuffer(Reader& src);

  // Sets buffer pointers of `*this` to buffer pointers of `src`, adjusting
  // `start()` to hide data already digested. Fails `*this` if `src` failed.
  void MakeBuffer(Reader& src);

  bool PullSlow(size_t min_length, size_t recommended_length) override;
  using Reader::ReadSlow;
  bool ReadSlow(size_t length, char* dest) override;
  bool ReadSlow(size_t length, Chain& dest) override;
  bool ReadSlow(size_t length, absl::Cord& dest) override;
  using Reader::ReadSomeSlow;
  bool ReadSomeSlow(size_t max_length, char* dest) override;
  void ReadHintSlow(size_t min_length, size_t recommended_length) override;
  std::optional<Position> SizeImpl() override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;

  // Invariants if `is_open()`:
  //   `start() == SrcReader()->cursor()`
  //   `limit() == SrcReader()->limit()`
  //   `limit_pos() == SrcReader()->limit_pos()`
};

// A `Reader` which reads from another `Reader`, and lets another object observe
// data being read and return some data called a digest, e.g. a checksum.
//
// The `Digester` template parameter specifies the type of the object providing
// and possibly owning the digester. `Digester` must support
// `Dependency<DigesterBaseHandle, Digester>`, e.g.
// `DigesterHandle<uint32_t>` (not owned), `Crc32cDigester` (owned),
// `AnyDigester<uint32_t>` (maybe owned).
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the original `Reader`. `Src` must support
// `Dependency<Reader*, Src>`, e.g. `Reader*` (not owned, default),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// By relying on CTAD the `Digester` template argument can be deduced as
// `TargetT` of the type of the `digester` constructor argument, and the `Src`
// template argument can be deduced as `TargetT` of the type of the `src`
// constructor argument.
//
// The original `Reader` must not be accessed until the `DigestingReader` is
// closed or no longer used.
template <typename Digester, typename Src = Reader*>
class DigestingReader : public DigestingReaderBase {
 public:
  // The type of the digest.
  using DigestType = DigestOf<Digester>;

  // Creates a closed `DigestingReader`.
  explicit DigestingReader(Closed) noexcept : DigestingReaderBase(kClosed) {}

  // Will read from the original `Reader` provided by `src`, using the
  // digester provided by `digester`.
  explicit DigestingReader(Initializer<Src> src,
                           Initializer<Digester> digester = riegeli::Maker());

  DigestingReader(DigestingReader&& that) = default;
  DigestingReader& operator=(DigestingReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `DigestingReader`. This
  // avoids constructing a temporary `DigestingReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      Initializer<Src> src, Initializer<Digester> digester = riegeli::Maker());

  // Digests buffered data if needed, and returns the digest.
  //
  // The digest is converted to `DesiredDigestType` using `DigestConverter`.
  //
  // Depending on the digester, `Digest()` can be more efficient if `Close()` is
  // called before.
  //
  // Many digesters support calling `Digest()` and then accepting more data or
  // calling `Digest()` again, but this is not guaranteed.
  template <
      typename DesiredDigestType = DigestType,
      std::enable_if_t<HasDigestConverter<DigestType, DesiredDigestType>::value,
                       int> = 0>
  DesiredDigestType Digest() {
    if (start_to_cursor() > 0) {
      if (ABSL_PREDICT_FALSE(!digester_.get().Write(
              absl::string_view(start(), start_to_cursor())))) {
        FailFromDigester();
      } else {
        set_buffer(cursor(), available());
        src_->set_cursor(cursor());
      }
    }
    return digester_.get().template Digest<DesiredDigestType>();
  }

  // Returns the object providing and possibly owning the digester. Unchanged by
  // `Close()`.
  Digester& digester() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return digester_.manager();
  }
  const Digester& digester() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return digester_.manager();
  }
  DigesterBaseHandle GetDigester() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return digester_.get();
  }

  // Returns the object providing and possibly owning the original `Reader`.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;
  bool WriteToDigester(absl::string_view src) override {
    return digester_.get().Write(src);
  }
  void SetReadAllHintImpl(bool read_all_hint) override;
  void VerifyEndImpl() override;
  bool SyncImpl(SyncType sync_type) override;

 private:
  class Mover;

  // The object providing and possibly owning the digester.
  Dependency<DigesterBaseHandle, Digester> digester_;
  // The object providing and possibly owning the original `Reader`.
  MovingDependency<Reader*, Src, Mover> src_;
};

explicit DigestingReader(Closed) -> DigestingReader<void, DeleteCtad<Closed>>;
template <typename Digester, typename Src>
explicit DigestingReader(Src&& src, Digester&& digester)
    -> DigestingReader<TargetT<Digester>, TargetT<Src>>;

// Reads all remaining data from `src` and returns their digest.
//
// If `length_read != nullptr` then sets `*length_read` to the length read.
// This is equal to the difference between `src.pos()` after and before the
// call.
//
// The `Digester` template parameter specifies the type of the object providing
// and possibly owning the digester. `Digester` must support
// `DependencyRef<DigesterBaseHandle, Digester>` and must provide a member
// function `DigestType Digest()` for some `DigestType`, e.g.
// `DigesterHandle<uint32_t>` (not owned), `Crc32cDigester` (owned),
// `AnyDigester<uint32_t>` (maybe owned).
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the `Reader`. `Src` must support
// `Dependency<Reader*, Src&&>`, e.g. `Reader&` (not owned),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// The digest is converted to `DesiredDigestType` using `DigestConverter`.
template <typename DesiredDigestType = digest_converter_internal::NoConversion,
          typename Digester, typename Src,
          std::enable_if_t<
              std::conjunction_v<
                  TargetRefSupportsDependency<DigesterBaseHandle, Digester>,
                  TargetRefSupportsDependency<Reader*, Src>,
                  digest_converter_internal::HasDigestConverterOrNoConversion<
                      DigestOf<TargetRefT<Digester>>, DesiredDigestType>>,
              int> = 0>
StatusOrMakerT<digest_converter_internal::ResolveNoConversion<
    DigestOf<TargetRefT<Digester>>, DesiredDigestType>>
DigestFromReader(Src&& src, Digester&& digester,
                 Position* length_read = nullptr);

// Implementation details follow.

inline DigestingReaderBase::DigestingReaderBase(
    DigestingReaderBase&& that) noexcept
    : Reader(static_cast<Reader&&>(that)) {}

inline DigestingReaderBase& DigestingReaderBase::operator=(
    DigestingReaderBase&& that) noexcept {
  Reader::operator=(static_cast<Reader&&>(that));
  return *this;
}

inline void DigestingReaderBase::Initialize(Reader* src,
                                            DigesterBaseHandle digester) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of DigestingReader: null Reader pointer";
  MakeBuffer(*src);
  absl::Status status = digester.status();
  if (ABSL_PREDICT_FALSE(!status.ok())) Fail(std::move(status));
}

inline bool DigestingReaderBase::SyncBuffer(Reader& src) {
  RIEGELI_ASSERT_EQ(start(), src.cursor())
      << "Failed invariant of DigestingReaderBase: "
         "cursor of the original Reader changed unexpectedly";
  if (start_to_cursor() > 0) {
    if (ABSL_PREDICT_FALSE(
            !WriteToDigester(absl::string_view(start(), start_to_cursor())))) {
      RIEGELI_EVAL_ASSERT(!FailFromDigester());
      return false;
    }
    src.set_cursor(cursor());
  }
  return true;
}

inline void DigestingReaderBase::MakeBuffer(Reader& src) {
  set_buffer(src.cursor(), src.available());
  set_limit_pos(src.limit_pos());
  if (ABSL_PREDICT_FALSE(!src.ok())) FailWithoutAnnotation(src.status());
}

template <typename Digester, typename Dest>
class DigestingReader<Digester, Dest>::Mover {
 public:
  static auto member() { return &DigestingReader::src_; }

  explicit Mover(DigestingReader& self, DigestingReader& that)
      : uses_buffer_(self.start() != nullptr) {
    // Buffer pointers are already moved so `SyncBuffer()` is called on `self`.
    // `src_` is not moved yet so `src_` is taken from `that`.
    if (uses_buffer_) {
      if (ABSL_PREDICT_FALSE(!self.SyncBuffer(*that.src_))) {
        uses_buffer_ = false;
      }
    }
  }

  void Done(DigestingReader& self) {
    if (uses_buffer_) self.MakeBuffer(*self.src_);
  }

 private:
  bool uses_buffer_;
};

template <typename Digester, typename Src>
inline DigestingReader<Digester, Src>::DigestingReader(
    Initializer<Src> src, Initializer<Digester> digester)
    : digester_(std::move(digester)), src_(std::move(src)) {
  Initialize(src_.get(), digester_.get());
}

template <typename Digester, typename Src>
inline void DigestingReader<Digester, Src>::Reset(Closed) {
  DigestingReaderBase::Reset(kClosed);
  digester_.Reset();
  src_.Reset();
}

template <typename Digester, typename Src>
inline void DigestingReader<Digester, Src>::Reset(
    Initializer<Src> src, Initializer<Digester> digester) {
  DigestingReaderBase::Reset();
  digester_.Reset(std::move(digester));
  src_.Reset(std::move(src));
  Initialize(src_.get(), digester_.get());
}

template <typename Digester, typename Src>
void DigestingReader<Digester, Src>::Done() {
  DigestingReaderBase::Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      FailWithoutAnnotation(src_->status());
    }
  }
  if (digester_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!digester_.get().Close())) FailFromDigester();
  }
}

template <typename Digester, typename Src>
void DigestingReader<Digester, Src>::SetReadAllHintImpl(bool read_all_hint) {
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!SyncBuffer(*src_))) return;
    src_->SetReadAllHint(read_all_hint);
    MakeBuffer(*src_);
  }
}

template <typename Digester, typename Src>
void DigestingReader<Digester, Src>::VerifyEndImpl() {
  if (!src_.IsOwning()) {
    DigestingReaderBase::VerifyEndImpl();
  } else if (ABSL_PREDICT_TRUE(ok())) {
    if (ABSL_PREDICT_FALSE(!SyncBuffer(*src_))) return;
    src_->VerifyEnd();
    MakeBuffer(*src_);
  }
}

template <typename Digester, typename Src>
bool DigestingReader<Digester, Src>::SyncImpl(SyncType sync_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(!SyncBuffer(*src_))) return false;
  bool sync_ok = true;
  if (sync_type != SyncType::kFromObject || src_.IsOwning()) {
    sync_ok = src_->Sync(sync_type);
  }
  MakeBuffer(*src_);
  return sync_ok;
}

template <typename DesiredDigestType, typename Digester, typename Src,
          std::enable_if_t<
              std::conjunction_v<
                  TargetRefSupportsDependency<DigesterBaseHandle, Digester>,
                  TargetRefSupportsDependency<Reader*, Src>,
                  digest_converter_internal::HasDigestConverterOrNoConversion<
                      DigestOf<TargetRefT<Digester>>, DesiredDigestType>>,
              int>>
inline StatusOrMakerT<digest_converter_internal::ResolveNoConversion<
    DigestOf<TargetRefT<Digester>>, DesiredDigestType>>
DigestFromReader(Src&& src, Digester&& digester, Position* length_read) {
  using DigestType = digest_converter_internal::ResolveNoConversion<
      DigestOf<TargetRefT<Digester>>, DesiredDigestType>;
  DigestingReader<TargetRefT<Digester>, TargetRefT<Src>> reader(
      std::forward<Src>(src), std::forward<Digester>(digester));
  reader.SetReadAllHint(true);
  const Position pos_before = reader.pos();
  do {
    reader.move_cursor(reader.available());
  } while (reader.Pull());
  RIEGELI_ASSERT_GE(reader.pos(), pos_before)
      << "DigestingReader decreased src.pos()";
  if (length_read != nullptr) *length_read = reader.pos() - pos_before;
  if (ABSL_PREDICT_FALSE(!reader.VerifyEndAndClose())) {
    return StatusOrMaker<DigestType>::FromStatus(reader.status());
  }
  return StatusOrMaker<DigestType>::FromWork(
      [&]() -> DigestType { return reader.template Digest<DigestType>(); });
}

}  // namespace riegeli

#endif  // RIEGELI_DIGESTS_DIGESTING_READER_H_


================================================
FILE: riegeli/digests/digesting_writer.cc
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/digests/digesting_writer.h"

#include <stddef.h>

#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/digests/digester_handle.h"

namespace riegeli {

namespace digesting_writer_internal {

absl::Status FailedStatus(DigesterBaseHandle digester) {
  absl::Status status = digester.status();
  if (status.ok()) status = absl::UnknownError("Digester failed");
  return status;
}

}  // namespace digesting_writer_internal

bool DigestingWriterBase::FailFromDigester() {
  const DigesterBaseHandle digester = GetDigester();
  return Fail(digesting_writer_internal::FailedStatus(digester));
}

void DigestingWriterBase::Done() {
  if (ABSL_PREDICT_TRUE(ok())) {
    Writer& dest = *DestWriter();
    SyncBuffer(dest);
  }
  Writer::Done();
}

absl::Status DigestingWriterBase::AnnotateStatusImpl(absl::Status status) {
  // Fully delegate annotations to `*DestWriter()`.
  if (is_open()) {
    Writer& dest = *DestWriter();
    const bool sync_buffer_ok = SyncBuffer(dest);
    status = dest.AnnotateStatus(std::move(status));
    if (ABSL_PREDICT_TRUE(sync_buffer_ok)) MakeBuffer(dest);
  }
  return status;
}

bool DigestingWriterBase::PushSlow(size_t min_length,
                                   size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Writer::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(dest))) return false;
  const bool push_ok = dest.Push(min_length, recommended_length);
  MakeBuffer(dest);
  return push_ok;
}

bool DigestingWriterBase::WriteSlow(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of Writer::WriteSlow(string_view): "
         "enough space available, use Write(string_view) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(dest))) return false;
  if (ABSL_PREDICT_FALSE(!WriteToDigester(src))) return FailFromDigester();
  const bool write_ok = dest.Write(src);
  MakeBuffer(dest);
  return write_ok;
}

bool DigestingWriterBase::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  return WriteInternal(std::move(src));
}

bool DigestingWriterBase::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  return WriteInternal(src);
}

bool DigestingWriterBase::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  return WriteInternal(std::move(src));
}

bool DigestingWriterBase::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  return WriteInternal(src);
}

bool DigestingWriterBase::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  return WriteInternal(std::move(src));
}

bool DigestingWriterBase::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  return WriteInternal(src);
}

template <typename Src>
inline bool DigestingWriterBase::WriteInternal(Src&& src) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(dest))) return false;
  DigesterBaseHandle digester = GetDigester();
  if (ABSL_PREDICT_FALSE(!digester.Write(src))) return FailFromDigester();
  const bool write_ok = dest.Write(std::forward<Src>(src));
  MakeBuffer(dest);
  return write_ok;
}

bool DigestingWriterBase::SupportsReadMode() {
  Writer* const dest = DestWriter();
  return dest != nullptr && dest->SupportsReadMode();
}

Reader* DigestingWriterBase::ReadModeImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  Writer& dest = *DestWriter();
  if (ABSL_PREDICT_FALSE(!SyncBuffer(dest))) return nullptr;
  Reader* const reader = dest.ReadMode(initial_pos);
  MakeBuffer(dest);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/digests/digesting_writer.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_DIGESTS_DIGESTING_WRITER_H_
#define RIEGELI_DIGESTS_DIGESTING_WRITER_H_

#include <stddef.h>

#include <optional>
#include <tuple>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/type_traits.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/null_writer.h"
#include "riegeli/bytes/stringify.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/digests/digest_converter.h"
#include "riegeli/digests/digester_handle.h"

namespace riegeli {

class Reader;

// Template parameter independent part of `DigestingWriter`.
class DigestingWriterBase : public Writer {
 public:
  // Returns the `DigesterBaseHandle`. Unchanged by `Close()`.
  virtual DigesterBaseHandle GetDigester() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns the original `Writer`. Unchanged by `Close()`.
  virtual Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool SupportsReadMode() override;

 protected:
  using Writer::Writer;

  DigestingWriterBase(DigestingWriterBase&& that) noexcept;
  DigestingWriterBase& operator=(DigestingWriterBase&& that) noexcept;

  void Initialize(Writer* dest, DigesterBaseHandle digester);
  ABSL_ATTRIBUTE_COLD bool FailFromDigester();

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;

  virtual bool WriteToDigester(absl::string_view src) = 0;

  // Sets cursor of `dest` to cursor of `*this`, digesting what has been written
  // to the buffer (until `cursor()`).
  bool SyncBuffer(Writer& dest);

  // Sets buffer pointers of `*this` to buffer pointers of `dest`, adjusting
  // `start()` to hide data already digested. Fails `*this` if `dest` failed.
  void MakeBuffer(Writer& dest);

  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using Writer::WriteSlow;
  bool WriteSlow(absl::string_view src) override;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(absl::Cord&& src) override;
  bool WriteSlow(ByteFill src) override;
  Reader* ReadModeImpl(Position initial_pos) override;

 private:
  // This template is defined and used only in digesting_writer.cc.
  template <typename Src>
  bool WriteInternal(Src&& src);

  // Invariants if `ok()`:
  //   `start() == DestWriter()->cursor()`
  //   `limit() == DestWriter()->limit()`
  //   `start_pos() == DestWriter()->pos()`
};

// A `Writer` which writes to another `Writer`, and lets another object observe
// data being written and return some data called a digest, e.g. a checksum.
//
// The `Digester` template parameter specifies the type of the object providing
// and possibly owning the digester. `Digester` must support
// `Dependency<DigesterBaseHandle, Digester>`, e.g.
// `DigesterHandle<uint32_t>` (not owned), `Crc32cDigester` (owned),
// `AnyDigester<uint32_t>>` (maybe owned).
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the original `Writer`. `Dest` must support
// `Dependency<Writer*, Dest>`, e.g. `Writer*` (not owned, default),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
//
// By relying on CTAD the `Digester` template argument can be deduced as
// `TargetT` of the type of the `digester` constructor argument, and the `Dest`
// template argument can be deduced as `TargetT` of the type of the `dest`
// constructor argument, or as `NullWriter` if the `dest` constructor argument
// is omitted.
//
// The original `Writer` must not be accessed until the `DigestingWriter` is
// closed or no longer used, except that it is allowed to read the destination
// of the original `Writer` immediately after `Flush()`.
template <typename Digester, typename Dest = Writer*>
class DigestingWriter : public DigestingWriterBase {
 public:
  // The type of the digest.
  using DigestType = DigestOf<Digester>;

  // Creates a closed `DigestingWriter`.
  explicit DigestingWriter(Closed) noexcept : DigestingWriterBase(kClosed) {}

  // Will write to the original `Writer` provided by `dest`, using the
  // digester provided by `digester`.
  //
  // If `Dest` is `NullWriter` and one constructor parameter is given,
  // it is interpreted as `digester` (overload below), not `dest`.
  template <
      typename DependentDest = Dest,
      std::enable_if_t<!std::is_same_v<DependentDest, NullWriter>, int> = 0>
  explicit DigestingWriter(Initializer<Dest> dest);
  explicit DigestingWriter(Initializer<Dest> dest,
                           Initializer<Digester> digester);

  // In the common case of `DigestingWriter<Digester, NullWriter>`,
  // an initializer for the `NullWriter` can be omitted.
  template <
      typename DependentDest = Dest,
      std::enable_if_t<std::is_same_v<DependentDest, NullWriter>, int> = 0>
  explicit DigestingWriter(Initializer<Digester> digester = riegeli::Maker());

  DigestingWriter(DigestingWriter&& that) = default;
  DigestingWriter& operator=(DigestingWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `DigestingWriter`. This
  // avoids constructing a temporary `DigestingWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  template <
      typename DependentDest = Dest,
      std::enable_if_t<!std::is_same_v<DependentDest, NullWriter>, int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Initializer<Digester> digester);
  template <
      typename DependentDest = Dest,
      std::enable_if_t<std::is_same_v<DependentDest, NullWriter>, int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      Initializer<Digester> digester = riegeli::Maker());

  // Digests buffered data if needed, and returns the digest.
  //
  // The digest is converted to `DesiredDigestType` using `DigestConverter`.
  //
  // Depending on the digester, `Digest()` can be more efficient if `Close()` is
  // called before.
  //
  // Many digesters support calling `Digest()` and then accepting more data or
  // calling `Digest()` again, but this is not guaranteed.
  template <
      typename DesiredDigestType = DigestType,
      std::enable_if_t<HasDigestConverter<DigestType, DesiredDigestType>::value,
                       int> = 0>
  DesiredDigestType Digest() {
    if (start_to_cursor() > 0) {
      if (ABSL_PREDICT_FALSE(!digester_.get().Write(
              absl::string_view(start(), start_to_cursor())))) {
        FailFromDigester();
      } else {
        set_start_pos(pos());
        set_buffer(cursor(), available());
        dest_->set_cursor(cursor());
      }
    }
    return digester_.get().template Digest<DesiredDigestType>();
  }

  // Returns the object providing and possibly owning the digester. Unchanged by
  // `Close()`.
  Digester& digester() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return digester_.manager();
  }
  const Digester& digester() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return digester_.manager();
  }
  DigesterBaseHandle GetDigester() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return digester_.get();
  }

  // Returns the object providing and possibly owning the original `Writer`.
  // Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  bool WriteToDigester(absl::string_view src) override {
    return digester_.get().Write(src);
  }
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  class Mover;

  // The object providing and possibly owning the digester.
  Dependency<DigesterBaseHandle, Digester> digester_;
  // The object providing and possibly owning the original `Writer`.
  MovingDependency<Writer*, Dest, Mover> dest_;
};

explicit DigestingWriter(Closed) -> DigestingWriter<void, DeleteCtad<Closed>>;
template <typename Digester, typename Dest>
explicit DigestingWriter(Dest&& dest, Digester&& digester)
    -> DigestingWriter<TargetT<Digester>, TargetT<Dest>>;
template <typename Digester>
explicit DigestingWriter(Digester&& digester)
    -> DigestingWriter<TargetT<Digester>, NullWriter>;

// Returns the digest of the concatenation of stringifiable values.
//
// The last argument is the digester of some type `Digester`. The remaining
// arguments are the values.
//
// `Digester` specifies the type of the object providing and possibly owning
// the digester. `Digester` must support
// `DependencyRef<DigesterBaseHandle, Digester>` and must provide a member
// function `DigestType Digest()` for some `DigestType`, e.g.
// `DigesterHandle<uint32_t>` (not owned), `Crc32cDigester` (owned),
// `AnyDigester<uint32_t>>` (maybe owned).
//
// The digester should not be expected to fail. If it fails, the process
// terminates.
//
// The digest is converted to `DesiredDigestType` using `DigestConverter`.
template <typename DesiredDigestType = digest_converter_internal::NoConversion,
          typename... Args,
          std::enable_if_t<
              std::conjunction_v<
                  TargetRefSupportsDependency<DigesterBaseHandle,
                                              GetTypeFromEndT<1, Args...>>,
                  TupleElementsSatisfy<RemoveTypesFromEndT<1, Args&&...>,
                                       IsStringifiable>,
                  digest_converter_internal::HasDigestConverterOrNoConversion<
                      DigestOf<TargetRefT<GetTypeFromEndT<1, Args...>>>,
                      DesiredDigestType>>,
              int> = 0>
digest_converter_internal::ResolveNoConversion<
    DigestOf<TargetRefT<GetTypeFromEndT<1, Args...>>>, DesiredDigestType>
DigestFrom(Args&&... args);

// Implementation details follow.

inline DigestingWriterBase::DigestingWriterBase(
    DigestingWriterBase&& that) noexcept
    : Writer(static_cast<Writer&&>(that)) {}

inline DigestingWriterBase& DigestingWriterBase::operator=(
    DigestingWriterBase&& that) noexcept {
  Writer::operator=(static_cast<Writer&&>(that));
  return *this;
}

inline void DigestingWriterBase::Initialize(Writer* dest,
                                            DigesterBaseHandle digester) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of DigestingWriter: null Writer pointer";
  MakeBuffer(*dest);
  absl::Status status = digester.status();
  if (ABSL_PREDICT_FALSE(!status.ok())) Fail(std::move(status));
}

inline bool DigestingWriterBase::SyncBuffer(Writer& dest) {
  RIEGELI_ASSERT_EQ(start(), dest.cursor())
      << "Failed invariant of DigestingWriterBase: "
         "cursor of the original Writer changed unexpectedly";
  if (start_to_cursor() > 0) {
    if (ABSL_PREDICT_FALSE(
            !WriteToDigester(absl::string_view(start(), start_to_cursor())))) {
      RIEGELI_EVAL_ASSERT(!FailFromDigester());
      return false;
    }
    dest.set_cursor(cursor());
  }
  return true;
}

inline void DigestingWriterBase::MakeBuffer(Writer& dest) {
  set_buffer(dest.cursor(), dest.available());
  set_start_pos(dest.pos());
  if (ABSL_PREDICT_FALSE(!dest.ok())) FailWithoutAnnotation(dest.status());
}

template <typename Digester, typename Dest>
class DigestingWriter<Digester, Dest>::Mover {
 public:
  static auto member() { return &DigestingWriter::dest_; }

  explicit Mover(DigestingWriter& self, DigestingWriter& that)
      : uses_buffer_(self.start() != nullptr) {
    // Buffer pointers are already moved so `SyncBuffer()` is called on `self`.
    // `dest_` is not moved yet so `dest_` is taken from `that`.
    if (uses_buffer_) {
      if (ABSL_PREDICT_FALSE(!self.SyncBuffer(*that.dest_))) {
        uses_buffer_ = false;
      }
    }
  }

  void Done(DigestingWriter& self) {
    if (uses_buffer_) self.MakeBuffer(*self.dest_);
  }

 private:
  bool uses_buffer_;
};

template <typename Digester, typename Dest>
template <typename DependentDest,
          std::enable_if_t<!std::is_same_v<DependentDest, NullWriter>, int>>
inline DigestingWriter<Digester, Dest>::DigestingWriter(Initializer<Dest> dest)
    : DigestingWriter(std::move(dest), riegeli::Maker()) {}

template <typename Digester, typename Dest>
inline DigestingWriter<Digester, Dest>::DigestingWriter(
    Initializer<Dest> dest, Initializer<Digester> digester)
    : digester_(std::move(digester)), dest_(std::move(dest)) {
  Initialize(dest_.get(), digester_.get());
}

template <typename Digester, typename Dest>
template <typename DependentDest,
          std::enable_if_t<std::is_same_v<DependentDest, NullWriter>, int>>
inline DigestingWriter<Digester, Dest>::DigestingWriter(
    Initializer<Digester> digester)
    : DigestingWriter(riegeli::Maker(), std::move(digester)) {}

template <typename Digester, typename Dest>
inline void DigestingWriter<Digester, Dest>::Reset(Closed) {
  DigestingWriterBase::Reset(kClosed);
  digester_.Reset();
  dest_.Reset();
}

template <typename Digester, typename Dest>
template <typename DependentDest,
          std::enable_if_t<!std::is_same_v<DependentDest, NullWriter>, int>>
inline void DigestingWriter<Digester, Dest>::Reset(Initializer<Dest> dest) {
  Reset(std::move(dest), riegeli::Maker());
}

template <typename Digester, typename Dest>
inline void DigestingWriter<Digester, Dest>::Reset(
    Initializer<Dest> dest, Initializer<Digester> digester) {
  DigestingWriterBase::Reset();
  digester_.Reset(std::move(digester));
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), digester_.get());
}

template <typename Digester, typename Dest>
template <typename DependentDest,
          std::enable_if_t<std::is_same_v<DependentDest, NullWriter>, int>>
inline void DigestingWriter<Digester, Dest>::Reset(
    Initializer<Digester> digester) {
  Reset(riegeli::Maker(), std::move(digester));
}

template <typename Digester, typename Dest>
void DigestingWriter<Digester, Dest>::Done() {
  DigestingWriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(dest_->status());
    }
  }
  if (digester_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!digester_.get().Close())) FailFromDigester();
  }
}

template <typename Digester, typename Dest>
void DigestingWriter<Digester, Dest>::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!SyncBuffer(*dest_))) return;
    dest_->SetWriteSizeHint(write_size_hint);
    if (digester_.IsOwning()) digester_.get().SetWriteSizeHint(write_size_hint);
    MakeBuffer(*dest_);
  } else if (digester_.IsOwning()) {
    digester_.get().SetWriteSizeHint(
        write_size_hint == std::nullopt
            ? std::nullopt
            : std::make_optional(SaturatingAdd(Position{start_to_cursor()},
                                               *write_size_hint)));
  }
}

template <typename Digester, typename Dest>
bool DigestingWriter<Digester, Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(!SyncBuffer(*dest_))) return false;
  bool flush_ok = true;
  if (flush_type != FlushType::kFromObject || dest_.IsOwning()) {
    flush_ok = dest_->Flush(flush_type);
  }
  MakeBuffer(*dest_);
  return flush_ok;
}

namespace digesting_writer_internal {

ABSL_ATTRIBUTE_COLD absl::Status FailedStatus(DigesterBaseHandle digester);

template <typename T, typename Enable = void>
struct IsDigestible : std::false_type {};

template <typename T>
struct IsDigestible<
    T, std::void_t<decltype(std::declval<DigesterBaseHandle&>().Write(
           std::declval<const T&>()))>> : std::true_type {};

template <typename DesiredDigestType, typename Digester, typename... Srcs,
          std::enable_if_t<std::conjunction_v<IsDigestible<Srcs>...>, int> = 0>
inline DesiredDigestType DigestFromImpl(std::tuple<Srcs...> srcs,
                                        Digester&& digester) {
  DependencyRef<DigesterBaseHandle, Digester> digester_dep(
      std::forward<Digester>(digester));
  if constexpr (HasStringifiedSize<Srcs...>::value) {
    if (digester_dep.IsOwning()) {
      digester_dep.get().SetWriteSizeHint(std::apply(
          [](const Srcs&... srcs) { return riegeli::StringifiedSize(srcs...); },
          srcs));
    }
  }
  bool ok = std::apply(
      [&](const Srcs&... srcs) {
        return (digester_dep.get().Write(srcs) && ...);
      },
      srcs);
  if (digester_dep.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!digester_dep.get().Close())) ok = false;
  }
  RIEGELI_CHECK(ok) << FailedStatus(digester_dep.get());
  return digester_dep.get().template Digest<DesiredDigestType>();
}

template <typename DesiredDigestType, typename Digester, typename... Srcs,
          std::enable_if_t<!std::conjunction_v<IsDigestible<Srcs>...>, int> = 0>
inline DesiredDigestType DigestFromImpl(std::tuple<Srcs...> srcs,
                                        Digester&& digester) {
  DigestingWriter<TargetRefT<Digester>, NullWriter> writer(
      std::forward<Digester>(digester));
  if constexpr (HasStringifiedSize<Srcs...>::value) {
    writer.SetWriteSizeHint(std::apply(
        [](const Srcs&... srcs) { return riegeli::StringifiedSize(srcs...); },
        srcs));
  }
  std::apply([&](const Srcs&... srcs) { writer.Write(srcs...); }, srcs);
  RIEGELI_CHECK(writer.Close()) << writer.status();
  return writer.template Digest<DesiredDigestType>();
}

}  // namespace digesting_writer_internal

template <typename DesiredDigestType, typename... Args,
          std::enable_if_t<
              std::conjunction_v<
                  TargetRefSupportsDependency<DigesterBaseHandle,
                                              GetTypeFromEndT<1, Args...>>,
                  TupleElementsSatisfy<RemoveTypesFromEndT<1, Args&&...>,
                                       IsStringifiable>,
                  digest_converter_internal::HasDigestConverterOrNoConversion<
                      DigestOf<TargetRefT<GetTypeFromEndT<1, Args...>>>,
                      DesiredDigestType>>,
              int>>
digest_converter_internal::ResolveNoConversion<
    DigestOf<TargetRefT<GetTypeFromEndT<1, Args...>>>, DesiredDigestType>
DigestFrom(Args&&... args) {
  return digesting_writer_internal::DigestFromImpl<
      digest_converter_internal::ResolveNoConversion<
          DigestOf<TargetRefT<GetTypeFromEndT<1, Args...>>>,
          DesiredDigestType>>(RemoveFromEnd<1>(std::forward<Args>(args)...),
                              GetFromEnd<1>(std::forward<Args>(args)...));
}

}  // namespace riegeli

#endif  // RIEGELI_DIGESTS_DIGESTING_WRITER_H_


================================================
FILE: riegeli/digests/highwayhash_digester.cc
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/digests/highwayhash_digester.h"

#include "highwayhash/hh_types.h"

namespace riegeli {

template <>
alignas(32) const HighwayHashKey
    HighwayHashDigester<highwayhash::HHResult64>::kDefaultKey = {
        0x4ea9929a25d561c6,
        0x98470d187b523e8f,
        0x592040a2da3c4b53,
        0xbff8b246e3c587a2,
};

template <>
alignas(32) const HighwayHashKey
    HighwayHashDigester<highwayhash::HHResult128>::kDefaultKey = {
        0x025ed8a16fb5f783,
        0xb44bc74d89d26c86,
        0x111ea964039fa769,
        0x6f7d7159e15612b6,
};

template <>
alignas(32) const HighwayHashKey
    HighwayHashDigester<highwayhash::HHResult256>::kDefaultKey = {
        0x93fee04321119357,
        0x21e397ea62c264b6,
        0x9d856914f2ad0e15,
        0x64dca6f86247f384,
};

}  // namespace riegeli


================================================
FILE: riegeli/digests/highwayhash_digester.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_DIGESTS_HIGHWAYHASH_DIGESTER_H_
#define RIEGELI_DIGESTS_HIGHWAYHASH_DIGESTER_H_

#include <stddef.h>

#include <array>
#include <type_traits>

#include "absl/base/attributes.h"
#include "absl/meta/type_traits.h"
#include "absl/strings/string_view.h"
#include "highwayhash/arch_specific.h"
#include "highwayhash/hh_types.h"
#include "highwayhash/highwayhash.h"

namespace riegeli {

// `uint64_t[4]`; prefer `alignas(32)` for constants of this type.
using HighwayHashKey = highwayhash::HHKey;

template <typename ResultType>
class HighwayHashDigester {
 private:
  using DigestType =
      std::conditional_t<std::is_array_v<ResultType>,
                         const std::array<std::remove_extent_t<ResultType>,
                                          std::extent_v<ResultType>>&,
                         ResultType>;

 public:
  // The default keys were chosen once with `openssl rand`.
  alignas(32) static const HighwayHashKey kDefaultKey;

  explicit HighwayHashDigester(const HighwayHashKey& key = kDefaultKey)
      : cat_(key) {}

  HighwayHashDigester(const HighwayHashDigester& that) = default;
  HighwayHashDigester& operator=(const HighwayHashDigester& that) = default;

  ABSL_ATTRIBUTE_REINITIALIZES
  void Reset(const HighwayHashKey& key = kDefaultKey) {
    cat_.Reset(key);
    is_open_ = true;
  }

  void Write(absl::string_view chunk) {
    cat_.Append(chunk.data(), chunk.size());
  }

  void Close() {
    if (is_open_) {
      cat_.Finalize(reinterpret_cast<ResultType*>(&digest_));
      is_open_ = false;
    }
  }

  DigestType Digest() {
    if (is_open_) {
      highwayhash::HighwayHashCatT<HH_TARGET>(cat_).Finalize(
          reinterpret_cast<ResultType*>(&digest_));
    }
    return digest_;
  }

 private:
  highwayhash::HighwayHashCatT<HH_TARGET> cat_;
  absl::remove_cvref_t<DigestType> digest_;
  bool is_open_ = true;
};

using HighwayHash64Digester = HighwayHashDigester<highwayhash::HHResult64>;
using HighwayHash128Digester = HighwayHashDigester<highwayhash::HHResult128>;
using HighwayHash256Digester = HighwayHashDigester<highwayhash::HHResult256>;

// Implementation details follow.

template <>
alignas(32) const HighwayHashKey
    HighwayHashDigester<highwayhash::HHResult64>::kDefaultKey;

template <>
alignas(32) const HighwayHashKey
    HighwayHashDigester<highwayhash::HHResult128>::kDefaultKey;

template <>
alignas(32) const HighwayHashKey
    HighwayHashDigester<highwayhash::HHResult256>::kDefaultKey;

}  // namespace riegeli

#endif  // RIEGELI_DIGESTS_HIGHWAYHASH_DIGESTER_H_


================================================
FILE: riegeli/digests/md5_digester.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_DIGESTS_MD5_DIGESTER_H_
#define RIEGELI_DIGESTS_MD5_DIGESTER_H_

#include "openssl/base.h"
#include "openssl/md5.h"
#include "riegeli/digests/openssl_digester.h"

namespace riegeli {

// A digester computing MD5 checksums, for `DigestingReader` and
// `DigestingWriter`.
//
// Warning: MD5 as a cryptographic hash function is broken.
// Use this only if a preexisting format has already decided to use MD5.
// Please contact ise-team@ in case of doubt.
using Md5Digester = OpenSslDigester<MD5_CTX, MD5_Init, MD5_Update, MD5_Final,
                                    MD5_DIGEST_LENGTH>;

}  // namespace riegeli

#endif  // RIEGELI_DIGESTS_MD5_DIGESTER_H_


================================================
FILE: riegeli/digests/openssl_digester.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_DIGESTS_OPENSSL_DIGESTER_H_
#define RIEGELI_DIGESTS_OPENSSL_DIGESTER_H_

#include <stddef.h>
#include <stdint.h>

#include <array>

#include "absl/base/attributes.h"
#include "absl/strings/string_view.h"

namespace riegeli {

// A digester template computing checksums implemented by OpenSSL, for
// `DigestingReader` and `DigestingWriter`.
template <typename H, int (*init)(H*), int (*update)(H*, const void*, size_t),
          int (*final)(uint8_t*, H*), int digest_size>
class OpenSslDigester {
 public:
  OpenSslDigester() { init(&ctx_); }

  OpenSslDigester(const OpenSslDigester& that) = default;
  OpenSslDigester& operator=(const OpenSslDigester& that) = default;

  ABSL_ATTRIBUTE_REINITIALIZES void Reset() {
    init(&ctx_);
    is_open_ = true;
  }

  void Write(absl::string_view src) { update(&ctx_, src.data(), src.size()); }

  void Close() {
    if (is_open_) {
      final(reinterpret_cast<uint8_t*>(digest_.data()), &ctx_);
      is_open_ = false;
    }
  }

  std::array<char, digest_size> Digest() {
    if (is_open_) {
      H copy = ctx_;
      final(reinterpret_cast<uint8_t*>(digest_.data()), &copy);
    }
    return digest_;
  }

 private:
  H ctx_;
  std::array<char, digest_size> digest_;
  bool is_open_ = true;
};

}  // namespace riegeli

#endif  // RIEGELI_DIGESTS_OPENSSL_DIGESTER_H_


================================================
FILE: riegeli/digests/sha1_digester.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_DIGESTS_SHA1_DIGESTER_H_
#define RIEGELI_DIGESTS_SHA1_DIGESTER_H_

#include "openssl/base.h"
#include "openssl/sha.h"
#include "riegeli/digests/openssl_digester.h"

namespace riegeli {

// A digester computing SHA-1 checksums, for `DigestingReader` and
// `DigestingWriter`.
//
// Warning: SHA-1 as a cryptographic hash function is broken.
// Use this only if a preexisting format has already decided to use SHA-1.
// Please contact ise-team@ in case of doubt.
using Sha1Digester = OpenSslDigester<SHA_CTX, SHA1_Init, SHA1_Update,
                                     SHA1_Final, SHA_DIGEST_LENGTH>;

}  // namespace riegeli

#endif  // RIEGELI_DIGESTS_SHA1_DIGESTER_H_


================================================
FILE: riegeli/digests/sha256_digester.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_DIGESTS_SHA256_DIGESTER_H_
#define RIEGELI_DIGESTS_SHA256_DIGESTER_H_

#include "openssl/base.h"
#include "openssl/sha.h"
#include "riegeli/digests/openssl_digester.h"

namespace riegeli {

// A digester computing SHA-256 checksums, for `DigestingReader` and
// `DigestingWriter`.
using Sha256Digester = OpenSslDigester<SHA256_CTX, SHA256_Init, SHA256_Update,
                                       SHA256_Final, SHA256_DIGEST_LENGTH>;

}  // namespace riegeli

#endif  // RIEGELI_DIGESTS_SHA256_DIGESTER_H_


================================================
FILE: riegeli/digests/sha512_256_digester.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_DIGESTS_SHA512_256_DIGESTER_H_
#define RIEGELI_DIGESTS_SHA512_256_DIGESTER_H_

#include "openssl/base.h"
#include "openssl/sha.h"
#include "riegeli/digests/openssl_digester.h"

namespace riegeli {

// A digester computing SHA-512/256 checksums, for `DigestingReader` and
// `DigestingWriter`.
using Sha512_256Digester =
    OpenSslDigester<SHA512_CTX, SHA512_256_Init, SHA512_256_Update,
                    SHA512_256_Final, SHA512_256_DIGEST_LENGTH>;

}  // namespace riegeli

#endif  // RIEGELI_DIGESTS_SHA512_256_DIGESTER_H_


================================================
FILE: riegeli/digests/sha512_digester.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_DIGESTS_SHA512_DIGESTER_H_
#define RIEGELI_DIGESTS_SHA512_DIGESTER_H_

#include "openssl/base.h"
#include "openssl/sha.h"
#include "riegeli/digests/openssl_digester.h"

namespace riegeli {

// A digester computing SHA-512 checksums, for `DigestingReader` and
// `DigestingWriter`.
using Sha512Digester = OpenSslDigester<SHA512_CTX, SHA512_Init, SHA512_Update,
                                       SHA512_Final, SHA512_DIGEST_LENGTH>;

}  // namespace riegeli

#endif  // RIEGELI_DIGESTS_SHA512_DIGESTER_H_


================================================
FILE: riegeli/digests/wrapping_digester.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_DIGESTS_WRAPPING_DIGESTER_H_
#define RIEGELI_DIGESTS_WRAPPING_DIGESTER_H_

#include <stddef.h>

#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/type_traits.h"
#include "riegeli/base/types.h"
#include "riegeli/digests/digest_converter.h"
#include "riegeli/digests/digester_handle.h"

namespace riegeli {

namespace wrapping_digester_internal {

// The type of a function converting a digest, taking it by value, except that
// conversion from `void` takes no parameters.

template <typename From, typename To, typename Enable = void>
struct DigestConverterFunction {
  using type = To (*)(From);
};

template <typename From, typename To>
struct DigestConverterFunction<From, To,
                               std::enable_if_t<std::is_void_v<From>>> {
  using type = To (*)();
};

}  // namespace wrapping_digester_internal

// Wraps an object providing and possibly owning a digester in a concrete
// digester type. Propagates calls to `Close()` if the base digester is owned.
// Possibly converts the type of the digest returned by `Digest()`.
//
// `BaseDigester` must support `Dependency<DigesterBaseHandle, BaseDigester>`.
//
// `DigestType` is the new digest type, by default `DigestOf<BaseDigester>`,
// i.e. unchanged.
//
// `digest_converter` is a function used to convert a digest, by default using
// `DigestConverter`.
template <typename BaseDigester, typename DigestType = DigestOf<BaseDigester>,
          typename wrapping_digester_internal::DigestConverterFunction<
              DigestOf<BaseDigester>, DigestType>::type digest_converter =
              nullptr>
class WrappingDigester {
 public:
  // Default-constructs the `BaseDigester`.
  template <
      typename DependentBaseDigester = BaseDigester,
      std::enable_if_t<std::is_default_constructible_v<DependentBaseDigester>,
                       int> = 0>
  WrappingDigester() : base_(riegeli::Maker()) {}

  // Forwards constructor arguments to the `BaseDigester`.
  template <
      typename... Args,
      std::enable_if_t<
          std::conjunction_v<NotSameRef<WrappingDigester, TargetT<Args>...>,
                             std::is_constructible<BaseDigester, Args&&...>>,
          int> = 0>
  explicit WrappingDigester(Args&&... args)
      : base_(riegeli::Maker(std::forward<Args>(args)...)) {}

  WrappingDigester(const WrappingDigester& that) = default;
  WrappingDigester& operator=(const WrappingDigester& that) = default;

  WrappingDigester(WrappingDigester&& that) = default;
  WrappingDigester& operator=(WrappingDigester&& that) = default;

  template <
      typename... Args,
      std::enable_if_t<
          std::conjunction_v<NotSameRef<WrappingDigester, TargetT<Args>...>,
                             std::is_constructible<BaseDigester, Args&&...>>,
          int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Args&&... args) {
    base_.Reset(riegeli::Maker(std::forward<Args>(args)...));
  }

  void SetWriteSizeHint(std::optional<Position> write_size_hint) {
    if (base_.IsOwning()) base_.get().SetWriteSizeHint(write_size_hint);
  }

  bool Write(absl::string_view src) { return base_.get().Write(src); }
  bool Write(const Chain& src) { return base_.get().Write(src); }
  bool Write(const absl::Cord& src) { return base_.get().Write(src); }
  bool Write(ByteFill src) { return base_.get().Write(src); }
  bool Close() { return !base_.IsOwning() || base_.get().Close(); }

  template <
      typename DependentBaseDigester = BaseDigester,
      std::enable_if_t<
          std::conjunction_v<
              std::bool_constant<digest_converter == nullptr>,
              HasDigestConverter<DigestOf<DependentBaseDigester>, DigestType>>,
          int> = 0>
  DigestType Digest() {
    return base_.get().template Digest<DigestType>();
  }
  template <
      typename DependentBaseDigester = BaseDigester,
      std::enable_if_t<
          std::conjunction_v<
              std::bool_constant<digest_converter != nullptr>,
              std::negation<std::is_void<DigestOf<DependentBaseDigester>>>>,
          int> = 0>
  DigestType Digest() {
    return digest_converter(base_.get().Digest());
  }
  template <
      typename DependentBaseDigester = BaseDigester,
      std::enable_if_t<
          std::conjunction_v<std::bool_constant<digest_converter != nullptr>,
                             std::is_void<DigestOf<DependentBaseDigester>>>,
          int> = 0>
  DigestType Digest() {
    base_.get().Digest();
    return digest_converter();
  }

 private:
  Dependency<DigesterBaseHandle, BaseDigester> base_;
};

}  // namespace riegeli

#endif  // RIEGELI_DIGESTS_WRAPPING_DIGESTER_H_


================================================
FILE: riegeli/endian/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "endian_reading",
    hdrs = ["endian_reading.h"],
    deps = [
        "//riegeli/base:null_safe_memcpy",
        "//riegeli/base:type_traits",
        "//riegeli/bytes:reader",
        "@com_google_absl//absl/base",
        "@com_google_absl//absl/base:config",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/numeric:int128",
        "@com_google_absl//absl/types:span",
    ],
)

cc_library(
    name = "endian_writing",
    hdrs = ["endian_writing.h"],
    deps = [
        "//riegeli/base:null_safe_memcpy",
        "//riegeli/base:type_traits",
        "//riegeli/bytes:backward_writer",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base",
        "@com_google_absl//absl/base:config",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/numeric:int128",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)


================================================
FILE: riegeli/endian/endian_reading.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_ENDIAN_ENDIAN_READING_H_
#define RIEGELI_ENDIAN_ENDIAN_READING_H_

#include <stdint.h>

#include <cstring>

#include "absl/base/casts.h"
#include "absl/base/config.h"
#include "absl/base/optimization.h"
#include "absl/numeric/int128.h"
#include "absl/types/span.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/type_traits.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

// Reads a number in a fixed width Little/Big Endian encoding from an array.
// The width of the encoding is determined by the template argument, which
// must be one of: `{u,}int{8,16,32,64}_t`, `absl::{u,}int128`, `float`, or
// `double`.
//
// Reads `sizeof(T)` bytes from `src[]`.
template <typename T>
T ReadLittleEndian(const char* src);
template <typename T>
T ReadBigEndian(const char* src);

// Reads an array of numbers in a fixed width Little/Big Endian encoding from an
// array. The width of the encoding is determined by the template argument,
// which must be one of: `{u,}int{8,16,32,64}_t`, `absl::{u,}int128`, `float`,
// or `double`.
//
// This is faster than reading them individually if the endianness matches the
// native one.
//
// Reads `dest.size() * sizeof(T)` bytes from `src[]`.
template <typename T>
void ReadLittleEndians(const char* src, absl::Span<type_identity_t<T>> dest);
template <typename T>
void ReadBigEndians(const char* src, absl::Span<type_identity_t<T>> dest);

// Reads a number in a fixed width Little/Big Endian encoding. The width of
// the encoding is determined by the template argument, which must be one of:
// `{u,}int{8,16,32,64}_t`, `absl::{u,}int128`, `float`, or `double`.
//
// Return values:
//  * `true`                     - success (`dest` is set)
//  * `false` (when `src.ok()`)  - source ends
//                                 (`src` position is unchanged,
//                                 `dest` is undefined)
//  * `false` (when `!src.ok()`) - failure
//                                 (`src` position is unchanged,
//                                 `dest` is undefined)
template <typename T>
bool ReadLittleEndian(Reader& src, type_identity_t<T>& dest);
template <typename T>
bool ReadBigEndian(Reader& src, type_identity_t<T>& dest);

// Reads an array of numbers in a fixed width Little/Big Endian encoding.
// The width of the encoding is determined by the template argument, which
// must be one of: `{u,}int{8,16,32,64}_t`, `absl::{u,}int128`, `float`, or
// `double`.
//
// This is faster than reading them individually if the endianness matches the
// native one.
//
// Return values:
//  * `true`                     - success (`dest[]` is filled)
//  * `false` (when `src.ok()`)  - source ends
//                                 (`src` position is undefined,
//                                 `dest[]` is undefined)
//  * `false` (when `!src.ok()`) - failure
//                                 (`src` position is undefined,
//                                 `dest[]` is undefined)
template <typename T>
bool ReadLittleEndians(Reader& src, absl::Span<type_identity_t<T>> dest);
template <typename T>
bool ReadBigEndians(Reader& src, absl::Span<type_identity_t<T>> dest);

// Implementation details follow.

template <>
inline uint8_t ReadLittleEndian<uint8_t>(const char* src) {
  return static_cast<uint8_t>(*src);
}

template <>
inline uint16_t ReadLittleEndian<uint16_t>(const char* src) {
#if ABSL_IS_LITTLE_ENDIAN
  uint16_t dest;
  std::memcpy(&dest, src, sizeof(uint16_t));
  return dest;
#else
  // `static_cast<uint16_t>` avoids triggering `-Wimplicit-int-conversion`:
  // the result of `uint16_t | uint16_t` is `int` (if `uint16_t` is narrower
  // than `int`).
  return static_cast<uint16_t>(uint16_t{static_cast<uint8_t>(src[0])} |
                               (uint16_t{static_cast<uint8_t>(src[1])} << 8));
#endif
}

template <>
inline uint32_t ReadLittleEndian<uint32_t>(const char* src) {
#if ABSL_IS_LITTLE_ENDIAN
  uint32_t dest;
  std::memcpy(&dest, src, sizeof(uint32_t));
  return dest;
#else
  return uint32_t{static_cast<uint8_t>(src[0])} |
         (uint32_t{static_cast<uint8_t>(src[1])} << 8) |
         (uint32_t{static_cast<uint8_t>(src[2])} << (2 * 8)) |
         (uint32_t{static_cast<uint8_t>(src[3])} << (3 * 8));
#endif
}

template <>
inline uint64_t ReadLittleEndian<uint64_t>(const char* src) {
#if ABSL_IS_LITTLE_ENDIAN
  uint64_t dest;
  std::memcpy(&dest, src, sizeof(uint64_t));
  return dest;
#else
  return uint64_t{static_cast<uint8_t>(src[0])} |
         (uint64_t{static_cast<uint8_t>(src[1])} << 8) |
         (uint64_t{static_cast<uint8_t>(src[2])} << (2 * 8)) |
         (uint64_t{static_cast<uint8_t>(src[3])} << (3 * 8)) |
         (uint64_t{static_cast<uint8_t>(src[4])} << (4 * 8)) |
         (uint64_t{static_cast<uint8_t>(src[5])} << (5 * 8)) |
         (uint64_t{static_cast<uint8_t>(src[6])} << (6 * 8)) |
         (uint64_t{static_cast<uint8_t>(src[7])} << (7 * 8));
#endif
}

template <>
inline absl::uint128 ReadLittleEndian<absl::uint128>(const char* src) {
#if ABSL_IS_LITTLE_ENDIAN
  absl::uint128 dest;
  std::memcpy(&dest, src, sizeof(absl::uint128));
  return dest;
#else
  const uint64_t low = ReadLittleEndian<uint64_t>(src);
  const uint64_t high = ReadLittleEndian<uint64_t>(src + sizeof(uint64_t));
  return absl::MakeUint128(high, low);
#endif
}

template <>
inline int8_t ReadLittleEndian<int8_t>(const char* src) {
  return static_cast<int8_t>(*src);
}

template <>
inline int16_t ReadLittleEndian<int16_t>(const char* src) {
  return static_cast<int16_t>(ReadLittleEndian<uint16_t>(src));
}

template <>
inline int32_t ReadLittleEndian<int32_t>(const char* src) {
  return static_cast<int32_t>(ReadLittleEndian<uint32_t>(src));
}

template <>
inline int64_t ReadLittleEndian<int64_t>(const char* src) {
  return static_cast<int64_t>(ReadLittleEndian<uint64_t>(src));
}

template <>
inline absl::int128 ReadLittleEndian<absl::int128>(const char* src) {
  return static_cast<absl::int128>(ReadLittleEndian<absl::uint128>(src));
}

template <>
inline float ReadLittleEndian<float>(const char* src) {
  return absl::bit_cast<float>(ReadLittleEndian<uint32_t>(src));
}

template <>
inline double ReadLittleEndian<double>(const char* src) {
  return absl::bit_cast<double>(ReadLittleEndian<uint64_t>(src));
}

template <>
inline uint8_t ReadBigEndian<uint8_t>(const char* src) {
  return static_cast<uint8_t>(*src);
}

template <>
inline uint16_t ReadBigEndian<uint16_t>(const char* src) {
#if ABSL_IS_BIG_ENDIAN
  uint16_t dest;
  std::memcpy(&dest, src, sizeof(uint16_t));
  return dest;
#else
  // `static_cast<uint16_t>` avoids triggering `-Wimplicit-int-conversion`:
  // the result of `uint16_t | uint16_t` is `int` (if `uint16_t` is narrower
  // than `int`).
  return static_cast<uint16_t>((uint16_t{static_cast<uint8_t>(src[0])} << 8) |
                               uint16_t{static_cast<uint8_t>(src[1])});
#endif
}

template <>
inline uint32_t ReadBigEndian<uint32_t>(const char* src) {
#if ABSL_IS_BIG_ENDIAN
  uint32_t dest;
  std::memcpy(&dest, src, sizeof(uint32_t));
  return dest;
#else
  return (uint32_t{static_cast<uint8_t>(src[0])} << (3 * 8)) |
         (uint32_t{static_cast<uint8_t>(src[1])} << (2 * 8)) |
         (uint32_t{static_cast<uint8_t>(src[2])} << 8) |
         uint32_t{static_cast<uint8_t>(src[3])};
#endif
}

template <>
inline uint64_t ReadBigEndian<uint64_t>(const char* src) {
#if ABSL_IS_BIG_ENDIAN
  uint64_t dest;
  std::memcpy(&dest, src, sizeof(uint64_t));
  return dest;
#else
  return (uint64_t{static_cast<uint8_t>(src[0])} << (7 * 8)) |
         (uint64_t{static_cast<uint8_t>(src[1])} << (6 * 8)) |
         (uint64_t{static_cast<uint8_t>(src[2])} << (5 * 8)) |
         (uint64_t{static_cast<uint8_t>(src[3])} << (4 * 8)) |
         (uint64_t{static_cast<uint8_t>(src[4])} << (3 * 8)) |
         (uint64_t{static_cast<uint8_t>(src[5])} << (2 * 8)) |
         (uint64_t{static_cast<uint8_t>(src[6])} << 8) |
         uint64_t{static_cast<uint8_t>(src[7])};
#endif
}

template <>
inline absl::uint128 ReadBigEndian<absl::uint128>(const char* src) {
#if ABSL_IS_BIG_ENDIAN
  absl::uint128 dest;
  std::memcpy(&dest, src, sizeof(absl::uint128));
  return dest;
#else
  const uint64_t high = ReadBigEndian<uint64_t>(src);
  const uint64_t low = ReadBigEndian<uint64_t>(src + sizeof(uint64_t));
  return absl::MakeUint128(high, low);
#endif
}

template <>
inline int8_t ReadBigEndian<int8_t>(const char* src) {
  return static_cast<int8_t>(*src);
}

template <>
inline int16_t ReadBigEndian<int16_t>(const char* src) {
  return static_cast<int16_t>(ReadBigEndian<uint16_t>(src));
}

template <>
inline int32_t ReadBigEndian<int32_t>(const char* src) {
  return static_cast<int32_t>(ReadBigEndian<uint32_t>(src));
}

template <>
inline int64_t ReadBigEndian<int64_t>(const char* src) {
  return static_cast<int64_t>(ReadBigEndian<uint64_t>(src));
}

template <>
inline absl::int128 ReadBigEndian<absl::int128>(const char* src) {
  return static_cast<absl::int128>(ReadBigEndian<absl::uint128>(src));
}

template <>
inline float ReadBigEndian<float>(const char* src) {
  return absl::bit_cast<float>(ReadBigEndian<uint32_t>(src));
}

template <>
inline double ReadBigEndian<double>(const char* src) {
  return absl::bit_cast<double>(ReadBigEndian<uint64_t>(src));
}

template <typename T>
inline void ReadLittleEndians(const char* src,
                              absl::Span<type_identity_t<T>> dest) {
#if ABSL_IS_LITTLE_ENDIAN
  riegeli::null_safe_memcpy(dest.data(), src, dest.size() * sizeof(T));
#else
  for (T& value : dest) {
    value = ReadLittleEndian<T>(src);
    src += sizeof(T);
  }
#endif
}

template <>
inline void ReadLittleEndians<uint8_t>(const char* src,
                                       absl::Span<uint8_t> dest) {
  riegeli::null_safe_memcpy(dest.data(), src, dest.size());
}

template <>
inline void ReadLittleEndians<int8_t>(const char* src,
                                      absl::Span<int8_t> dest) {
  riegeli::null_safe_memcpy(dest.data(), src, dest.size());
}

template <typename T>
inline void ReadBigEndians(const char* src,
                           absl::Span<type_identity_t<T>> dest) {
#if ABSL_IS_BIG_ENDIAN
  riegeli::null_safe_memcpy(dest.data(), src, dest.size() * sizeof(T));
#else
  if constexpr (sizeof(T) == 1) {
    riegeli::null_safe_memcpy(dest.data(), src, dest.size());
  } else {
    for (T& value : dest) {
      value = ReadBigEndian<T>(src);
      src += sizeof(T);
    }
  }
#endif
}

template <>
inline void ReadBigEndians<uint8_t>(const char* src, absl::Span<uint8_t> dest) {
  riegeli::null_safe_memcpy(dest.data(), src, dest.size());
}

template <>
inline void ReadBigEndians<int8_t>(const char* src, absl::Span<int8_t> dest) {
  riegeli::null_safe_memcpy(dest.data(), src, dest.size());
}

template <typename T>
inline bool ReadLittleEndian(Reader& src, type_identity_t<T>& dest) {
  if (ABSL_PREDICT_FALSE(!src.Pull(sizeof(T)))) return false;
  dest = ReadLittleEndian<T>(src.cursor());
  src.move_cursor(sizeof(T));
  return true;
}

template <>
inline bool ReadLittleEndian<uint8_t>(Reader& src, uint8_t& dest) {
  return src.ReadByte(dest);
}

template <>
inline bool ReadLittleEndian<int8_t>(Reader& src, int8_t& dest) {
  uint8_t byte;
  if (ABSL_PREDICT_FALSE(!src.ReadByte(byte))) return false;
  dest = static_cast<int8_t>(byte);
  return true;
}

template <typename T>
inline bool ReadBigEndian(Reader& src, type_identity_t<T>& dest) {
  if (ABSL_PREDICT_FALSE(!src.Pull(sizeof(T)))) return false;
  dest = ReadBigEndian<T>(src.cursor());
  src.move_cursor(sizeof(T));
  return true;
}

template <>
inline bool ReadBigEndian<uint8_t>(Reader& src, uint8_t& dest) {
  return src.ReadByte(dest);
}

template <>
inline bool ReadBigEndian<int8_t>(Reader& src, int8_t& dest) {
  uint8_t byte;
  if (ABSL_PREDICT_FALSE(!src.ReadByte(byte))) return false;
  dest = static_cast<int8_t>(byte);
  return true;
}

template <typename T>
inline bool ReadLittleEndians(Reader& src,
                              absl::Span<type_identity_t<T>> dest) {
#if ABSL_IS_LITTLE_ENDIAN
  return src.Read(dest.size() * sizeof(T),
                  reinterpret_cast<char*>(dest.data()));
#else
  for (T& dest_value : dest) {
    if (ABSL_PREDICT_FALSE(!ReadLittleEndian<T>(src, dest_value))) return false;
  }
  return true;
#endif
}

template <>
inline bool ReadLittleEndians<uint8_t>(Reader& src, absl::Span<uint8_t> dest) {
  return src.Read(dest.size(), reinterpret_cast<char*>(dest.data()));
}

template <>
inline bool ReadLittleEndians<int8_t>(Reader& src, absl::Span<int8_t> dest) {
  return src.Read(dest.size(), reinterpret_cast<char*>(dest.data()));
}

template <typename T>
inline bool ReadBigEndians(Reader& src, absl::Span<type_identity_t<T>> dest) {
#if ABSL_IS_BIG_ENDIAN
  return src.Read(dest.size() * sizeof(T),
                  reinterpret_cast<char*>(dest.data()));
#else
  for (T& dest_value : dest) {
    if (ABSL_PREDICT_FALSE(!ReadBigEndian<T>(src, dest_value))) return false;
  }
  return true;
#endif
}

template <>
inline bool ReadBigEndians<uint8_t>(Reader& src, absl::Span<uint8_t> dest) {
  return src.Read(dest.size(), reinterpret_cast<char*>(dest.data()));
}

template <>
inline bool ReadBigEndians<int8_t>(Reader& src, absl::Span<int8_t> dest) {
  return src.Read(dest.size(), reinterpret_cast<char*>(dest.data()));
}

}  // namespace riegeli

#endif  // RIEGELI_ENDIAN_ENDIAN_READING_H_


================================================
FILE: riegeli/endian/endian_writing.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_ENDIAN_ENDIAN_WRITING_H_
#define RIEGELI_ENDIAN_ENDIAN_WRITING_H_

#include <stdint.h>

#include <cstring>

#include "absl/base/casts.h"
#include "absl/base/config.h"
#include "absl/base/optimization.h"
#include "absl/numeric/int128.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/type_traits.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

// Writes a number in a fixed width Little/Big Endian encoding to an array.
// The width of the encoding is determined by the template argument, which
// must be one of: `{u,}int{8,16,32,64}_t`, `absl::{u,}int128`, `float`, or
// `double`.
//
// Writes `sizeof(T)` bytes to `dest[]`.
template <typename T>
void WriteLittleEndian(type_identity_t<T> data, char* dest);
template <typename T>
void WriteBigEndian(type_identity_t<T> data, char* dest);

// Writes an array of numbers in a fixed width Little/Big Endian encoding.
// The width of the encoding is determined by the template argument, which
// must be one of: `{u,}int{8,16,32,64}_t`, `absl::{u,}int128`, `float`, or
// `double`.
//
// This is faster than writing them individually if the endianness matches the
// native one.
//
// Writes `data.size() * sizeof(T)` bytes to `dest[]`.
template <typename T>
void WriteLittleEndians(absl::Span<const type_identity_t<T>> data, char* dest);
template <typename T>
void WriteBigEndians(absl::Span<const type_identity_t<T>> data, char* dest);

// Writes a number in a fixed width Little/Big Endian encoding. The width of
// the encoding is determined by the template argument, which must be one of:
// `{u,}int{8,16,32,64}_t`, `absl::{u,}int128`, `float`, or `double`.
//
// Return values:
//  * `true`  - success (`dest.ok()`)
//  * `false` - failure (`!dest.ok()`)
template <typename T>
bool WriteLittleEndian(type_identity_t<T> data, Writer& dest);
template <typename T>
bool WriteBigEndian(type_identity_t<T> data, Writer& dest);

// Writes an array of numbers in a fixed width Little/Big Endian encoding.
// The width of the encoding is determined by the template argument, which
// must be one of: `{u,}int{8,16,32,64}_t`, `absl::{u,}int128`, `float`, or
// `double`.
//
// This is faster than writing them individually if the endianness matches the
// native one.
//
// Return values:
//  * `true`  - success (`dest.ok()`)
//  * `false` - failure (`!dest.ok()`)
template <typename T>
bool WriteLittleEndians(absl::Span<const type_identity_t<T>> data,
                        Writer& dest);
template <typename T>
bool WriteBigEndians(absl::Span<const type_identity_t<T>> data, Writer& dest);

// Writes a number in a fixed width Little/Big Endian encoding to a
// `BackwardWriter`. The width of the encoding is determined by the template
// argument, which must be one of: `{u,}int{8,16,32,64}_t`, `absl::{u,}int128`,
// `float`, or `double`.
//
// Return values:
//  * `true`  - success (`dest.ok()`)
//  * `false` - failure (`!dest.ok()`)
template <typename T>
bool WriteLittleEndian(type_identity_t<T> data, BackwardWriter& dest);
template <typename T>
bool WriteBigEndian(type_identity_t<T> data, BackwardWriter& dest);

// Writes an array of numbers in a fixed width Little/Big Endian encoding to a
// `BackwardWriter`. The width of the encoding is determined by the template
// argument, which must be one of: `{u,}int{8,16,32,64}_t`, `absl::{u,}int128`,
// `float`, or `double`.
//
// This is faster than writing them individually if the endianness matches the
// native one.
//
// Return values:
//  * `true`  - success (`dest.ok()`)
//  * `false` - failure (`!dest.ok()`)
template <typename T>
bool WriteLittleEndians(absl::Span<const type_identity_t<T>> data,
                        BackwardWriter& dest);
template <typename T>
bool WriteBigEndians(absl::Span<const type_identity_t<T>> data,
                     BackwardWriter& dest);

// Implementation details follow.

template <>
inline void WriteLittleEndian<uint8_t>(uint8_t data, char* dest) {
  *dest = static_cast<char>(data);
}

template <>
inline void WriteLittleEndian<uint16_t>(uint16_t data, char* dest) {
#if ABSL_IS_LITTLE_ENDIAN
  std::memcpy(dest, &data, sizeof(uint16_t));
#else
  dest[0] = static_cast<char>(data);
  dest[1] = static_cast<char>(data >> 8);
#endif
}

template <>
inline void WriteLittleEndian<uint32_t>(uint32_t data, char* dest) {
#if ABSL_IS_LITTLE_ENDIAN
  std::memcpy(dest, &data, sizeof(uint32_t));
#else
  dest[0] = static_cast<char>(data);
  dest[1] = static_cast<char>(data >> 8);
  dest[2] = static_cast<char>(data >> (2 * 8));
  dest[3] = static_cast<char>(data >> (3 * 8));
#endif
}

template <>
inline void WriteLittleEndian<uint64_t>(uint64_t data, char* dest) {
#if ABSL_IS_LITTLE_ENDIAN
  std::memcpy(dest, &data, sizeof(uint64_t));
#else
  dest[0] = static_cast<char>(data);
  dest[1] = static_cast<char>(data >> 8);
  dest[2] = static_cast<char>(data >> (2 * 8));
  dest[3] = static_cast<char>(data >> (3 * 8));
  dest[4] = static_cast<char>(data >> (4 * 8));
  dest[5] = static_cast<char>(data >> (5 * 8));
  dest[6] = static_cast<char>(data >> (6 * 8));
  dest[7] = static_cast<char>(data >> (7 * 8));
#endif
}

template <>
inline void WriteLittleEndian<absl::uint128>(absl::uint128 data, char* dest) {
#if ABSL_IS_LITTLE_ENDIAN
  std::memcpy(dest, &data, sizeof(absl::uint128));
#else
  WriteLittleEndian<uint64_t>(absl::Uint128Low64(data), dest);
  WriteLittleEndian<uint64_t>(absl::Uint128High64(data),
                              dest + sizeof(uint64_t));
#endif
}

template <>
inline void WriteLittleEndian<int8_t>(int8_t data, char* dest) {
  *dest = static_cast<char>(data);
}

template <>
inline void WriteLittleEndian<int16_t>(int16_t data, char* dest) {
  WriteLittleEndian<uint16_t>(static_cast<uint16_t>(data), dest);
}

template <>
inline void WriteLittleEndian<int32_t>(int32_t data, char* dest) {
  WriteLittleEndian<uint32_t>(static_cast<uint32_t>(data), dest);
}

template <>
inline void WriteLittleEndian<int64_t>(int64_t data, char* dest) {
  WriteLittleEndian<uint64_t>(static_cast<uint64_t>(data), dest);
}

template <>
inline void WriteLittleEndian<absl::int128>(absl::int128 data, char* dest) {
  WriteLittleEndian<absl::uint128>(static_cast<absl::uint128>(data), dest);
}

template <>
inline void WriteLittleEndian<float>(float data, char* dest) {
  WriteLittleEndian<uint32_t>(absl::bit_cast<uint32_t>(data), dest);
}

template <>
inline void WriteLittleEndian<double>(double data, char* dest) {
  WriteLittleEndian<uint64_t>(absl::bit_cast<uint64_t>(data), dest);
}

template <>
inline void WriteBigEndian<uint8_t>(uint8_t data, char* dest) {
  *dest = static_cast<char>(data);
}

template <>
inline void WriteBigEndian<uint16_t>(uint16_t data, char* dest) {
#if ABSL_IS_BIG_ENDIAN
  std::memcpy(dest, &data, sizeof(uint16_t));
#else
  dest[0] = static_cast<char>(data >> 8);
  dest[1] = static_cast<char>(data);
#endif
}

template <>
inline void WriteBigEndian<uint32_t>(uint32_t data, char* dest) {
#if ABSL_IS_BIG_ENDIAN
  std::memcpy(dest, &data, sizeof(uint32_t));
#else
  dest[0] = static_cast<char>(data >> (3 * 8));
  dest[1] = static_cast<char>(data >> (2 * 8));
  dest[2] = static_cast<char>(data >> 8);
  dest[3] = static_cast<char>(data);
#endif
}

template <>
inline void WriteBigEndian<uint64_t>(uint64_t data, char* dest) {
#if ABSL_IS_BIG_ENDIAN
  std::memcpy(dest, &data, sizeof(uint64_t));
#else
  dest[0] = static_cast<char>(data >> (7 * 8));
  dest[1] = static_cast<char>(data >> (6 * 8));
  dest[2] = static_cast<char>(data >> (5 * 8));
  dest[3] = static_cast<char>(data >> (4 * 8));
  dest[4] = static_cast<char>(data >> (3 * 8));
  dest[5] = static_cast<char>(data >> (2 * 8));
  dest[6] = static_cast<char>(data >> 8);
  dest[7] = static_cast<char>(data);
#endif
}

template <>
inline void WriteBigEndian<absl::uint128>(absl::uint128 data, char* dest) {
#if ABSL_IS_BIG_ENDIAN
  std::memcpy(dest, &data, sizeof(absl::uint128));
#else
  WriteBigEndian<uint64_t>(absl::Uint128High64(data), dest);
  WriteBigEndian<uint64_t>(absl::Uint128Low64(data), dest + sizeof(uint64_t));
#endif
}

template <>
inline void WriteBigEndian<int8_t>(int8_t data, char* dest) {
  *dest = static_cast<char>(data);
}

template <>
inline void WriteBigEndian<int16_t>(int16_t data, char* dest) {
  WriteBigEndian<uint16_t>(static_cast<uint16_t>(data), dest);
}

template <>
inline void WriteBigEndian<int32_t>(int32_t data, char* dest) {
  WriteBigEndian<uint32_t>(static_cast<uint32_t>(data), dest);
}

template <>
inline void WriteBigEndian<int64_t>(int64_t data, char* dest) {
  WriteBigEndian<uint64_t>(static_cast<uint64_t>(data), dest);
}

template <>
inline void WriteBigEndian<absl::int128>(absl::int128 data, char* dest) {
  WriteBigEndian<absl::uint128>(static_cast<absl::uint128>(data), dest);
}

template <>
inline void WriteBigEndian<float>(float data, char* dest) {
  WriteBigEndian<uint32_t>(absl::bit_cast<uint32_t>(data), dest);
}

template <>
inline void WriteBigEndian<double>(double data, char* dest) {
  WriteBigEndian<uint64_t>(absl::bit_cast<uint64_t>(data), dest);
}

template <typename T>
inline void WriteLittleEndians(absl::Span<const type_identity_t<T>> data,
                               char* dest) {
#if ABSL_IS_LITTLE_ENDIAN
  riegeli::null_safe_memcpy(dest, data.data(), data.size() * sizeof(T));
#else
  for (const T value : data) {
    WriteLittleEndian<T>(value, dest);
    dest += sizeof(T);
  }
#endif
}

template <>
inline void WriteLittleEndians<uint8_t>(absl::Span<const uint8_t> data,
                                        char* dest) {
  riegeli::null_safe_memcpy(dest, data.data(), data.size());
}

template <>
inline void WriteLittleEndians<int8_t>(absl::Span<const int8_t> data,
                                       char* dest) {
  riegeli::null_safe_memcpy(dest, data.data(), data.size());
}

template <typename T>
inline void WriteBigEndians(absl::Span<const type_identity_t<T>> data,
                            char* dest) {
#if ABSL_IS_BIG_ENDIAN
  riegeli::null_safe_memcpy(dest, data.data(), data.size() * sizeof(T));
#else
  for (const T value : data) {
    WriteBigEndian<T>(value, dest);
    dest += sizeof(T);
  }
#endif
}

template <>
inline void WriteBigEndians<uint8_t>(absl::Span<const uint8_t> data,
                                     char* dest) {
  riegeli::null_safe_memcpy(dest, data.data(), data.size());
}

template <>
inline void WriteBigEndians<int8_t>(absl::Span<const int8_t> data, char* dest) {
  riegeli::null_safe_memcpy(dest, data.data(), data.size());
}

template <typename T>
inline bool WriteLittleEndian(type_identity_t<T> data, Writer& dest) {
  if (ABSL_PREDICT_FALSE(!dest.Push(sizeof(T)))) return false;
  WriteLittleEndian<T>(data, dest.cursor());
  dest.move_cursor(sizeof(T));
  return true;
}

template <>
inline bool WriteLittleEndian<uint8_t>(uint8_t data, Writer& dest) {
  return dest.WriteByte(data);
}

template <>
inline bool WriteLittleEndian<int8_t>(int8_t data, Writer& dest) {
  return dest.WriteByte(static_cast<uint8_t>(data));
}

template <typename T>
inline bool WriteBigEndian(type_identity_t<T> data, Writer& dest) {
  if (ABSL_PREDICT_FALSE(!dest.Push(sizeof(T)))) return false;
  WriteBigEndian<T>(data, dest.cursor());
  dest.move_cursor(sizeof(T));
  return true;
}

template <>
inline bool WriteBigEndian<uint8_t>(uint8_t data, Writer& dest) {
  return dest.WriteByte(data);
}

template <>
inline bool WriteBigEndian<int8_t>(int8_t data, Writer& dest) {
  return dest.WriteByte(static_cast<uint8_t>(data));
}

template <typename T>
inline bool WriteLittleEndians(absl::Span<const type_identity_t<T>> data,
                               Writer& dest) {
#if ABSL_IS_LITTLE_ENDIAN
  return dest.Write(absl::string_view(
      reinterpret_cast<const char*>(data.data()), data.size() * sizeof(T)));
#else
  for (const T value : data) {
    if (ABSL_PREDICT_FALSE(!WriteLittleEndian<T>(value, dest))) return false;
  }
  return true;
#endif
}

template <>
inline bool WriteLittleEndians<uint8_t>(absl::Span<const uint8_t> data,
                                        Writer& dest) {
  return dest.Write(absl::string_view(
      reinterpret_cast<const char*>(data.data()), data.size()));
}

template <>
inline bool WriteLittleEndians<int8_t>(absl::Span<const int8_t> data,
                                       Writer& dest) {
  return dest.Write(absl::string_view(
      reinterpret_cast<const char*>(data.data()), data.size()));
}

template <typename T>
inline bool WriteBigEndians(absl::Span<const type_identity_t<T>> data,
                            Writer& dest) {
#if ABSL_IS_BIG_ENDIAN
  return dest.Write(absl::string_view(
      reinterpret_cast<const char*>(data.data()), data.size() * sizeof(T)));
#else
  for (const T value : data) {
    if (ABSL_PREDICT_FALSE(!WriteBigEndian<T>(value, dest))) return false;
  }
  return true;
#endif
}

template <>
inline bool WriteBigEndians<uint8_t>(absl::Span<const uint8_t> data,
                                     Writer& dest) {
  return dest.Write(absl::string_view(
      reinterpret_cast<const char*>(data.data()), data.size()));
}

template <>
inline bool WriteBigEndians<int8_t>(absl::Span<const int8_t> data,
                                    Writer& dest) {
  return dest.Write(absl::string_view(
      reinterpret_cast<const char*>(data.data()), data.size()));
}

template <typename T>
inline bool WriteLittleEndian(type_identity_t<T> data, BackwardWriter& dest) {
  if (ABSL_PREDICT_FALSE(!dest.Push(sizeof(T)))) return false;
  dest.move_cursor(sizeof(T));
  WriteLittleEndian<T>(data, dest.cursor());
  return true;
}

template <>
inline bool WriteLittleEndian<uint8_t>(uint8_t data, BackwardWriter& dest) {
  return dest.WriteByte(data);
}

template <>
inline bool WriteLittleEndian<int8_t>(int8_t data, BackwardWriter& dest) {
  return dest.WriteByte(static_cast<uint8_t>(data));
}

template <typename T>
inline bool WriteBigEndian(type_identity_t<T> data, BackwardWriter& dest) {
  if (ABSL_PREDICT_FALSE(!dest.Push(sizeof(T)))) return false;
  dest.move_cursor(sizeof(T));
  WriteBigEndian<T>(data, dest.cursor());
  return true;
}

template <>
inline bool WriteBigEndian<uint8_t>(uint8_t data, BackwardWriter& dest) {
  return dest.WriteByte(data);
}

template <>
inline bool WriteBigEndian<int8_t>(int8_t data, BackwardWriter& dest) {
  return dest.WriteByte(static_cast<uint8_t>(data));
}

template <typename T>
inline bool WriteLittleEndians(absl::Span<const type_identity_t<T>> data,
                               BackwardWriter& dest) {
#if ABSL_IS_LITTLE_ENDIAN
  return dest.Write(absl::string_view(
      reinterpret_cast<const char*>(data.data()), data.size() * sizeof(T)));
#else
  for (auto iter = data.crbegin(); iter != data.crend(); ++iter) {
    if (ABSL_PREDICT_FALSE(!WriteLittleEndian<T>(*iter, dest))) return false;
  }
  return true;
#endif
}

template <>
inline bool WriteLittleEndians<uint8_t>(absl::Span<const uint8_t> data,
                                        BackwardWriter& dest) {
  return dest.Write(absl::string_view(
      reinterpret_cast<const char*>(data.data()), data.size()));
}

template <>
inline bool WriteLittleEndians<int8_t>(absl::Span<const int8_t> data,
                                       BackwardWriter& dest) {
  return dest.Write(absl::string_view(
      reinterpret_cast<const char*>(data.data()), data.size()));
}

template <typename T>
inline bool WriteBigEndians(absl::Span<const type_identity_t<T>> data,
                            BackwardWriter& dest) {
#if ABSL_IS_BIG_ENDIAN
  return dest.Write(absl::string_view(
      reinterpret_cast<const char*>(data.data()), data.size() * sizeof(T)));
#else
  for (auto iter = data.crbegin(); iter != data.crend(); ++iter) {
    if (ABSL_PREDICT_FALSE(!WriteBigEndian<T>(*iter, dest))) return false;
  }
  return true;
#endif
}

template <>
inline bool WriteBigEndians<uint8_t>(absl::Span<const uint8_t> data,
                                     BackwardWriter& dest) {
  return dest.Write(absl::string_view(
      reinterpret_cast<const char*>(data.data()), data.size()));
}

template <>
inline bool WriteBigEndians<int8_t>(absl::Span<const int8_t> data,
                                    BackwardWriter& dest) {
  return dest.Write(absl::string_view(
      reinterpret_cast<const char*>(data.data()), data.size()));
}

}  // namespace riegeli

#endif  // RIEGELI_ENDIAN_ENDIAN_WRITING_H_


================================================
FILE: riegeli/gcs/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "gcs_object",
    srcs = ["gcs_object.cc"],
    hdrs = ["gcs_object.h"],
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/base:compare",
        "//riegeli/base:debug",
        "//riegeli/base:global",
        "//riegeli/base:reset",
        "//riegeli/base:string_ref",
        "//riegeli/bytes:ostream_writer",
        "//riegeli/bytes:string_writer",
        "//riegeli/bytes:stringify",
        "//riegeli/bytes:stringify_writer",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "gcs_reader",
    srcs = [
        "gcs_internal.h",
        "gcs_reader.cc",
    ],
    hdrs = ["gcs_reader.h"],
    deps = [
        ":gcs_object",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:reset",
        "//riegeli/base:status",
        "//riegeli/base:type_traits",
        "//riegeli/base:types",
        "//riegeli/bytes:buffer_options",
        "//riegeli/bytes:istream_reader",
        "//riegeli/bytes:reader",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@google_cloud_cpp//:common",
        "@google_cloud_cpp//:storage",
    ],
)

cc_library(
    name = "gcs_writer",
    srcs = [
        "gcs_internal.h",
        "gcs_writer.cc",
    ],
    hdrs = ["gcs_writer.h"],
    deps = [
        ":gcs_object",
        ":gcs_reader",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:reset",
        "//riegeli/base:status",
        "//riegeli/base:string_ref",
        "//riegeli/base:type_traits",
        "//riegeli/base:types",
        "//riegeli/bytes:buffer_options",
        "//riegeli/bytes:ostream_writer",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@google_cloud_cpp//:common",
        "@google_cloud_cpp//:storage",
    ],
)


================================================
FILE: riegeli/gcs/gcs_internal.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_GCS_GCS_INTERNAL_H_
#define RIEGELI_GCS_GCS_INTERNAL_H_

#include "absl/base/attributes.h"
#include "absl/status/status.h"
#include "google/cloud/status.h"

namespace riegeli::gcs_internal {

inline absl::Status FromCloudStatus(const google::cloud::Status& status) {
  return absl::Status(static_cast<absl::StatusCode>(status.code()),
                      status.message());
}

template <typename T>
T GetOption() {
  return T();
}
template <typename T>
const T& GetOption(const T& option) {
  return option;
}
template <typename T, typename... Options>
const T& GetOption(ABSL_ATTRIBUTE_UNUSED const T& previous_option,
                   const T& option, const Options&... options) {
  return GetOption<T>(option, options...);
}
template <typename T, typename Other, typename... Options>
const T& GetOption(const T& option,
                   ABSL_ATTRIBUTE_UNUSED const Other& other_option,
                   const Options&... options) {
  return GetOption<T>(option, options...);
}
template <typename T, typename Other, typename... Options>
auto GetOption(ABSL_ATTRIBUTE_UNUSED const Other& other_option,
               const Options&... options) {
  return GetOption<T>(options...);
}

}  // namespace riegeli::gcs_internal

#endif  // RIEGELI_GCS_GCS_INTERNAL_H_


================================================
FILE: riegeli/gcs/gcs_object.cc
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/gcs/gcs_object.h"

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <optional>
#include <ostream>
#include <string>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/debug.h"
#include "riegeli/base/global.h"
#include "riegeli/bytes/ostream_writer.h"
#include "riegeli/bytes/string_writer.h"
#include "riegeli/bytes/stringify.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

namespace {

constexpr absl::string_view kUriPrefix = "gs://";

inline bool IsUtf8Tail(char ch, unsigned char start = 0x80,
                       unsigned char end = 0xbf) {
  return static_cast<unsigned char>(ch) >= start &&
         static_cast<unsigned char>(ch) <= end;
}

}  // namespace

void GcsObject::ParseUri(absl::string_view uri) {
  if (ABSL_PREDICT_FALSE(!absl::StartsWith(uri, kUriPrefix))) {
    FailParsing("GCS URI does not begin with \"gs://\"", uri);
    return;
  }
  const absl::string_view after_prefix(uri.data() + kUriPrefix.size(),
                                       uri.size() - kUriPrefix.size());

  const size_t slash = after_prefix.find('/');
  if (ABSL_PREDICT_FALSE(slash == absl::string_view::npos)) {
    FailParsing(
        "GCS URI does not include \"/\" "
        "separating bucket name from object name",
        uri);
    return;
  }
  const absl::string_view after_bucket_name(after_prefix.data() + (slash + 1),
                                            after_prefix.size() - (slash + 1));

  size_t hash = after_bucket_name.rfind('#');
  std::optional<int64_t> generation;
  if (hash == absl::string_view::npos) {
    hash = after_bucket_name.size();
  } else {
    const char* ptr = after_bucket_name.data() + (hash + 1);
    const char* const limit =
        after_bucket_name.data() + after_bucket_name.size();
    if (ABSL_PREDICT_FALSE(ptr == limit)) {
      FailParsing("Empty generation number", uri);
      return;
    }
    generation = 0;
    do {
      if (ABSL_PREDICT_FALSE(*ptr < '0' || *ptr > '9')) {
        FailParsing("Invalid generation number", uri);
        return;
      }
      const int64_t digit = *ptr - '0';
      if (ABSL_PREDICT_FALSE(*generation >
                             std::numeric_limits<int64_t>::max() / 10)) {
        FailParsing("Generation number out of range", uri);
        return;
      }
      *generation *= 10;
      if (ABSL_PREDICT_FALSE(*generation >
                             std::numeric_limits<int64_t>::max() - digit)) {
        FailParsing("Generation number out of range", uri);
        return;
      }
      *generation += digit;
      ++ptr;
    } while (ptr != limit);
  }

  bucket_name_.assign(after_prefix.data(), slash);
  object_name_.assign(after_bucket_name.data(), hash);
  generation_ = generation;
  ValidateNames();
}

const absl::Status& GcsObject::DefaultStatus() {
  return Global([] {
    return absl::InvalidArgumentError("Default-constructed GcsObject");
  });
}

const absl::Status& GcsObject::MovedFromStatus() {
  return Global(
      [] { return absl::InvalidArgumentError("Moved-from GcsObject"); });
}

inline bool GcsObject::FailParsing(absl::string_view message,
                                   absl::string_view context) {
  status_ = absl::InvalidArgumentError(
      absl::StrCat(message, ": ", riegeli::Debug(context)));
  return false;
}

void GcsObject::ValidateNames() {
  if (ABSL_PREDICT_FALSE(!ValidateBucketName() || !ValidateObjectName())) {
    bucket_name_ = std::string();
    object_name_ = std::string();
    generation_ = std::nullopt;
  }
}

inline bool GcsObject::ValidateBucketName() {
  const absl::string_view bucket_name = bucket_name_;
  if (ABSL_PREDICT_FALSE(bucket_name.size() < 3)) {
    return FailParsing("Bucket name must not be shorter than 3 characters",
                       bucket_name);
  }
  if (ABSL_PREDICT_FALSE(bucket_name.size() > 222)) {
    return FailParsing("Bucket name must not be longer than 222 characters",
                       bucket_name);
  }

  // Whether there were any '.' characters.
  bool has_dots = false;
  // Whether there were any '_' characters.
  bool has_underscores = false;
  // Whether the current dot-separated component contains characters other than
  // digits.
  bool component_has_non_digits = false;
  // Whether the previous character was a letter or digit.
  bool after_alphanumeric = false;
  // The beginning of the current dot-separated component.
  const char* component_begin = bucket_name.data();

  const char* const limit = bucket_name.data() + bucket_name.size();
  for (const char* cursor = bucket_name.data(); cursor < limit; ++cursor) {
    const char ch = *cursor;
    if (ch >= 'a' && ch <= 'z') {
      component_has_non_digits = true;
      after_alphanumeric = true;
    } else if (ch >= '0' && ch <= '9') {
      after_alphanumeric = true;
    } else {
      if (ABSL_PREDICT_FALSE(cursor == component_begin)) {
        return FailParsing(
            component_begin == bucket_name.data()
                ? absl::string_view(
                      "Bucket name must begin with a letter or digit")
                : absl::string_view("Bucket name dot-separated component must "
                                    "begin with a letter or digit"),
            bucket_name);
      }
      if (ch == '-') {
        component_has_non_digits = true;
        after_alphanumeric = false;
      } else if (ch == '_') {
        has_underscores = true;
        component_has_non_digits = true;
        after_alphanumeric = false;
      } else if (ch == '.') {
        if (ABSL_PREDICT_FALSE(PtrDistance(component_begin, cursor) > 63)) {
          return FailParsing(
              "Bucket name dot-separated component must not "
              "be longer than 63 characters",
              bucket_name);
        }
        if (ABSL_PREDICT_FALSE(!after_alphanumeric)) {
          return FailParsing(
              "Bucket name dot-separated component must "
              "end with a letter or digit",
              bucket_name);
        }
        has_dots = true;
        component_has_non_digits = false;
        after_alphanumeric = false;
        component_begin = cursor + 1;
      } else {
        return FailParsing(
            "Bucket name must consist of "
            "letters, digits, hyphens, underscores, and dots",
            bucket_name);
      }
    }
  }
  if (ABSL_PREDICT_FALSE(PtrDistance(component_begin, limit) > 63)) {
    return FailParsing(
        component_begin == bucket_name.data()
            ? absl::string_view("Bucket name with no dots must not "
                                "be longer than 63 characters")
            : absl::string_view("Bucket name dot-separated component must not "
                                "be longer than 63 characters"),
        bucket_name);
  }
  if (ABSL_PREDICT_FALSE(!after_alphanumeric)) {
    return FailParsing("Bucket name must end with a letter or digit",
                       bucket_name);
  }
  if (has_dots) {
    if (ABSL_PREDICT_FALSE(has_underscores)) {
      return FailParsing("Bucket name with dots must not contain underscores",
                         bucket_name);
    }
    if (ABSL_PREDICT_FALSE(!component_has_non_digits)) {
      return FailParsing(
          "Bucket name last dot-separated component must not "
          "consist of only digits",
          bucket_name);
    }
  }
  return true;
}

inline bool GcsObject::ValidateObjectName() {
  const absl::string_view object_name = object_name_;
  if (ABSL_PREDICT_FALSE(object_name.empty())) {
    return FailParsing("Object name must not be empty", object_name);
  }
  if (ABSL_PREDICT_FALSE(object_name.size() > 1024)) {
    return FailParsing("Object name must not be longer than 1024 bytes",
                       object_name);
  }

  const char* const limit = object_name.data() + object_name.size();
  for (const char* cursor = object_name.data(); cursor < limit;) {
    const unsigned char byte = static_cast<unsigned char>(*cursor);
    if (ABSL_PREDICT_TRUE(byte <= 0x7f)) {
      if (ABSL_PREDICT_FALSE(byte == '\n' || byte == '\r')) {
        return FailParsing("Object name must not contain newlines",
                           object_name);
      }
      ++cursor;
    } else {
      if (ABSL_PREDICT_FALSE(byte < 0xc2 || byte > 0xf4)) {
      invalid:
        return FailParsing("Object name must be valid UTF-8", object_name);
      }
      const size_t remaining = PtrDistance(cursor, limit);
      if (byte <= 0xdf) {
        if (ABSL_PREDICT_FALSE(remaining < 2 || !IsUtf8Tail(cursor[1]))) {
          goto invalid;
        }
        cursor += 2;
      } else if (byte <= 0xef) {
        if (ABSL_PREDICT_FALSE(
                remaining < 3 ||
                (byte == 0xe0   ? !IsUtf8Tail(cursor[1], 0xa0, 0xbf)
                 : byte == 0xed ? !IsUtf8Tail(cursor[1], 0x80, 0x9f)
                                : !IsUtf8Tail(cursor[1])) ||
                !IsUtf8Tail(cursor[2]))) {
          goto invalid;
        }
        cursor += 3;
      } else {
        if (ABSL_PREDICT_FALSE(
                remaining < 4 ||
                (byte == 0xf0   ? !IsUtf8Tail(cursor[1], 0x90, 0xbf)
                 : byte == 0xf4 ? !IsUtf8Tail(cursor[1], 0x80, 0x8f)
                                : !IsUtf8Tail(cursor[1]))) ||
            !IsUtf8Tail(cursor[2]) || !IsUtf8Tail(cursor[3])) {
          goto invalid;
        }
        cursor += 4;
      }
    }
  }

  if (ABSL_PREDICT_FALSE(object_name == ".")) {
    return FailParsing("Object name must not be \".\"", object_name);
  }
  if (ABSL_PREDICT_FALSE(object_name == "..")) {
    return FailParsing("Object name must not be \"..\"", object_name);
  }
  if (ABSL_PREDICT_FALSE(
          absl::StartsWith(object_name, ".well-known/acme-challenge/"))) {
    return FailParsing(
        "Object name must not start with \".well-known/acme-challenge/\"",
        object_name);
  }
  return true;
}

bool GcsObject::Equal(const GcsObject& a, const GcsObject& b) {
  if (ABSL_PREDICT_FALSE(!a.ok())) return a.status() == b.status();
  if (ABSL_PREDICT_FALSE(!b.ok())) return false;
  return a.bucket_name() == b.bucket_name() &&
         a.object_name() == b.object_name() && a.generation() == b.generation();
}

void GcsObject::Output(std::ostream& dest) const {
  OStreamWriter<> writer(&dest);
  WriteTo(writer);
  writer.Close();
}

void GcsObject::WriteTo(Writer& dest) const {
  if (ABSL_PREDICT_FALSE(!ok())) {
    dest.Write('<', status().message(), '>');
    return;
  }
  dest.Write(kUriPrefix, bucket_name(), '/', object_name());
  if (generation() != std::nullopt) dest.Write('#', *generation());
}

std::string GcsObject::uri() const {
  std::string uri;
  if (ABSL_PREDICT_TRUE(ok())) {
    StringWriter<> writer(&uri);
    writer.SetWriteSizeHint(
        kUriPrefix.size() + bucket_name().size() + 1 + object_name().size() +
        (generation() == std::nullopt
             ? 0
             : 1 + riegeli::StringifiedSize(*generation())));
    WriteTo(writer);
    writer.Close();
  }
  return uri;
}

}  // namespace riegeli


================================================
FILE: riegeli/gcs/gcs_object.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_GCS_GCS_OBJECT_H_
#define RIEGELI_GCS_GCS_OBJECT_H_

#include <stdint.h>

#include <iosfwd>
#include <optional>
#include <string>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/string_ref.h"
#include "riegeli/bytes/stringify_writer.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

// Identifies a GCS object by storing the bucket name, object name, and optional
// generation.
//
// It provides parsing and producing URIs of the form
// "gs://bucket_name/object_name#generation" (generation is optional).
//
// The syntax of bucket names is validated against
// https://cloud.google.com/storage/docs/naming#requirements and
// https://datatracker.ietf.org/doc/html/rfc3696#section-2.
// The syntax of object names is validated against
// https://cloud.google.com/storage/docs/objects#naming and
// https://datatracker.ietf.org/doc/html/rfc3629#section-4.
//
// No detailed verification of the domain name is performed in bucket names
// containing dots (e.g. that the top-level domain is currently recognized),
// nor that the bucket name does not begin with "goog" or contain "google"
// (this applies only to bucket creation).
class GcsObject : public WithEqual<GcsObject> {
 public:
  // Constructs a dummy `GcsObject` which is `!ok()`.
  GcsObject() noexcept : status_(DefaultStatus()) {}

  // Constructs `GcsObject` from bucket name, object name, and
  // optional generation.
  explicit GcsObject(StringInitializer bucket_name,
                     StringInitializer object_name,
                     std::optional<int64_t> generation = std::nullopt);

  // Constructs `GcsObject` from a URI of the form
  // "gs://bucket_name/object_name#generation" (generation is optional).
  //
  // If the URI is not in the expected format, `ok()` is `false` and `status()`
  // explains the reason.
  explicit GcsObject(absl::string_view uri);

  GcsObject(const GcsObject& that);
  GcsObject& operator=(const GcsObject& that);

  GcsObject(GcsObject&& that) noexcept;
  GcsObject& operator=(GcsObject&& that) noexcept;

  // Makes `*this` equivalent to a newly constructed `GcsObject`. This avoids
  // constructing a temporary `GcsObject` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset();
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      StringInitializer bucket_name, StringInitializer object_name,
      std::optional<int64_t> generation = std::nullopt);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(absl::string_view uri);

  // Returns `true` if the `GcsObject` is valid (the URI was
  // successfully parsed).
  bool ok() const { return status_.ok(); }
  // If `ok()`, returns `absl::OkStatus()`, otherwise returns the status
  // explaining the reason why the `GcsObject` is invalid.
  const absl::Status& status() const { return status_; }

  const std::string& bucket_name() const { return bucket_name_; }
  const std::string& object_name() const { return object_name_; }
  std::optional<int64_t> generation() const { return generation_; }

  // Returns the URI of the form "gs://bucket_name/object_name#generation"
  // (generation is optional), or empty string if `!ok()`.
  std::string uri() const;

  friend bool operator==(const GcsObject& a, const GcsObject& b) {
    return Equal(a, b);
  }

  // Default stringification by `absl::StrCat()` etc.
  //
  // Writes `src.uri()` to `dest`, or "<status_message>" if `!ok()`.
  template <typename Sink>
  friend void AbslStringify(Sink& dest, const GcsObject& src) {
    StringifyWriter<Sink*> writer(&dest);
    src.WriteTo(writer);
    writer.Close();
  }

  // Writes `src.uri()` to `dest`, or "<status_message>" if `!ok()`.
  friend std::ostream& operator<<(std::ostream& dest, const GcsObject& src) {
    src.Output(dest);
    return dest;
  }

 private:
  static const absl::Status& DefaultStatus();
  static const absl::Status& MovedFromStatus();

  void ParseUri(absl::string_view uri);
  ABSL_ATTRIBUTE_COLD bool FailParsing(absl::string_view message,
                                       absl::string_view context);
  void ValidateNames();
  bool ValidateBucketName();
  bool ValidateObjectName();

  static bool Equal(const GcsObject& a, const GcsObject& b);

  void WriteTo(Writer& dest) const;
  void Output(std::ostream& dest) const;

  absl::Status status_;
  std::string bucket_name_;
  std::string object_name_;
  std::optional<int64_t> generation_;
};

// Implementation details follow.

inline GcsObject::GcsObject(const GcsObject& that)
    : status_(that.status()),
      bucket_name_(that.bucket_name()),
      object_name_(that.object_name()),
      generation_(that.generation()) {}

inline GcsObject& GcsObject::operator=(const GcsObject& that) {
  status_ = that.status();
  bucket_name_ = that.bucket_name();
  object_name_ = that.object_name();
  generation_ = that.generation();
  return *this;
}

inline GcsObject::GcsObject(GcsObject&& that) noexcept
    : status_(std::exchange(that.status_, MovedFromStatus())),
      bucket_name_(std::exchange(that.bucket_name_, std::string())),
      object_name_(std::exchange(that.object_name_, std::string())),
      generation_(std::exchange(that.generation_, std::nullopt)) {}

inline GcsObject& GcsObject::operator=(GcsObject&& that) noexcept {
  status_ = std::exchange(that.status_, MovedFromStatus());
  bucket_name_ = std::exchange(that.bucket_name_, std::string());
  object_name_ = std::exchange(that.object_name_, std::string());
  generation_ = std::exchange(that.generation_, std::nullopt);
  return *this;
}

inline GcsObject::GcsObject(StringInitializer bucket_name,
                            StringInitializer object_name,
                            std::optional<int64_t> generation)
    : bucket_name_(std::move(bucket_name)),
      object_name_(std::move(object_name)),
      generation_(generation) {
  ValidateNames();
}

inline GcsObject::GcsObject(absl::string_view uri) { ParseUri(uri); }

inline void GcsObject::Reset() {
  status_ = DefaultStatus();
  bucket_name_.clear();
  object_name_.clear();
  generation_ = std::nullopt;
}

inline void GcsObject::Reset(StringInitializer bucket_name,
                             StringInitializer object_name,
                             std::optional<int64_t> generation) {
  status_ = absl::OkStatus();
  riegeli::Reset(bucket_name_, std::move(bucket_name));
  riegeli::Reset(object_name_, std::move(object_name));
  generation_ = generation;
  ValidateNames();
}

inline void GcsObject::Reset(absl::string_view uri) {
  status_ = absl::OkStatus();
  bucket_name_.clear();
  object_name_.clear();
  generation_ = std::nullopt;
  ParseUri(uri);
}

}  // namespace riegeli

#endif  // RIEGELI_GCS_GCS_OBJECT_H_


================================================
FILE: riegeli/gcs/gcs_reader.cc
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/gcs/gcs_reader.h"

#include <stddef.h>
#include <stdint.h>

#include <functional>
#include <limits>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "google/cloud/status.h"
#include "google/cloud/storage/client.h"
#include "google/cloud/storage/object_read_stream.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/object.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/istream_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/gcs/gcs_internal.h"
#include "riegeli/gcs/gcs_object.h"

namespace riegeli {

GcsReader::GcsReader(
    NewReaderTag, const google::cloud::storage::Client& client,
    const GcsObject& object,
    const std::function<google::cloud::storage::ObjectReadStream(
        GcsReader&, int64_t)>& read_object,
    BufferOptions buffer_options, Position read_from_offset)
    : IStreamReader(kClosed),
      client_(client),
      object_(object),
      read_object_(read_object) {
  IStreamReader::Reset(read_object(*this, IntCast<int64_t>(read_from_offset)),
                       IStreamReaderBase::Options()
                           .set_assumed_pos(read_from_offset)
                           .set_buffer_options(buffer_options));
  PropagateStatus();
  set_limit_pos(read_from_offset);
}

void GcsReader::Reset(Closed) {
  IStreamReader::Reset(kClosed);
  client_ = std::nullopt;
  object_ = GcsObject();
  read_object_ = nullptr;
}

void GcsReader::Initialize(google::cloud::storage::ObjectReadStream stream,
                           BufferOptions buffer_options,
                           const RangeOptions& range_options) {
  uint64_t initial_pos = 0;
  if (ABSL_PREDICT_TRUE(range_options.initial_pos >= 0)) {
    switch (range_options.origin) {
      case Origin::kBegin:
        initial_pos = IntCast<uint64_t>(range_options.initial_pos);
        break;
      case Origin::kEnd:
        if (stream.size() != std::nullopt &&
            ABSL_PREDICT_TRUE(*stream.size() <=
                              uint64_t{std::numeric_limits<int64_t>::max()})) {
          initial_pos = SaturatingSub(
              *stream.size(), IntCast<uint64_t>(range_options.initial_pos));
        }
        break;
    }
  }
  IStreamReader::Reset(std::move(stream),
                       IStreamReaderBase::Options()
                           .set_assumed_pos(initial_pos)
                           .set_buffer_options(buffer_options));
  PropagateStatus();
  if (src().size() != std::nullopt &&
      ABSL_PREDICT_TRUE(*src().size() <=
                        uint64_t{std::numeric_limits<int64_t>::max()})) {
    set_exact_size(*src().size());
    if (range_options.max_size != std::nullopt &&
        ABSL_PREDICT_TRUE(*range_options.max_size >= 0)) {
      set_exact_size(UnsignedMin(*exact_size(),
                                 IntCast<uint64_t>(*range_options.max_size)));
    }
  }
}

void GcsReader::Done() {
  IStreamReader::Done();
  src().Close();
  PropagateStatus();
  client_ = std::nullopt;
  read_object_ = nullptr;
}

absl::Status GcsReader::AnnotateStatusImpl(absl::Status status) {
  if (object_.ok()) {
    status = Annotate(status, absl::StrCat("reading ", object_));
  }
  return IStreamReader::AnnotateStatusImpl(std::move(status));
}

inline void GcsReader::PropagateStatus() {
  if (ABSL_PREDICT_FALSE(!src().status().ok())) PropagateStatusSlow();
}

void GcsReader::PropagateStatusSlow() {
  RIEGELI_ASSERT(!src().status().ok())
      << "Failed precondition of GcsReader::PropagateStatusSlow(): "
         "ObjectReadStream not failed";
  MarkNotFailed();
  Fail(gcs_internal::FromCloudStatus(src().status()));
}

bool GcsReader::ReadInternal(size_t min_length, size_t max_length, char* dest) {
  RIEGELI_ASSERT_GT(min_length, 0u)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "nothing to read";
  RIEGELI_ASSERT_GE(max_length, min_length)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "max_length < min_length";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedReader::ReadInternal()";
  const bool result = IStreamReader::ReadInternal(min_length, max_length, dest);
  PropagateStatus();
  return result;
}

bool GcsReader::SupportsRandomAccess() { return exact_size() != std::nullopt; }

bool GcsReader::SeekBehindBuffer(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(exact_size() == std::nullopt)) {
    return IStreamReader::SeekBehindBuffer(new_pos);
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  // `ObjectReadStream` does not support seeking to the very end.
  const Position read_from_offset =
      UnsignedMin(new_pos, SaturatingSub(*exact_size(), Position{1}));
  src() = read_object_(*this, IntCast<int64_t>(read_from_offset));
  PropagateStatus();
  set_limit_pos(read_from_offset);
  if (new_pos > read_from_offset) {
    if (ABSL_PREDICT_FALSE(!Pull())) return false;
    move_cursor(1);
  } else {
    if (ABSL_PREDICT_FALSE(!ok())) return false;
  }
  return new_pos <= *exact_size();
}

bool GcsReader::SupportsNewReader() { return exact_size() != std::nullopt; }

std::unique_ptr<Reader> GcsReader::NewReaderImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!GcsReader::SupportsNewReader())) {
    // Delegate to the base class to avoid repeating the error message.
    return IStreamReader::NewReaderImpl(initial_pos);
  }
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point.

  // `ObjectReadStream` does not support seeking to the very end.
  const Position read_from_offset =
      UnsignedMin(initial_pos, SaturatingSub(*exact_size(), Position{1}));
  std::unique_ptr<GcsReader> reader(
      new GcsReader(NewReaderTag(), client(), object(), read_object_,
                    buffer_options(), read_from_offset));
  reader->set_exact_size(exact_size());
  if (initial_pos > read_from_offset) {
    if (ABSL_PREDICT_TRUE(reader->Pull())) reader->move_cursor(1);
  }
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/gcs/gcs_reader.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_GCS_GCS_READER_H_
#define RIEGELI_GCS_GCS_READER_H_

#include <stddef.h>
#include <stdint.h>

#include <functional>
#include <memory>
#include <optional>
#include <tuple>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "google/cloud/storage/client.h"
#include "google/cloud/storage/download_options.h"
#include "google/cloud/storage/object_read_stream.h"
#include "google/cloud/storage/well_known_parameters.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/type_traits.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/istream_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/gcs/gcs_internal.h"
#include "riegeli/gcs/gcs_object.h"

namespace riegeli {

// A `Reader` which reads from a Google Cloud Storage object.
class GcsReader
    : public IStreamReader<google::cloud::storage::ObjectReadStream> {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    static constexpr size_t kDefaultMinBufferSize = size_t{64} << 10;
    static constexpr size_t kDefaultMaxBufferSize = size_t{1} << 20;
  };

  // Creates a closed `GcsReader`.
  explicit GcsReader(Closed) : IStreamReader(kClosed) {}

  // Will read from `object`.
  template <typename... ReadObjectOptions>
  explicit GcsReader(const google::cloud::storage::Client& client,
                     Initializer<GcsObject> object, Options options,
                     ReadObjectOptions&&... read_object_options);
  template <typename... ReadObjectOptions>
  explicit GcsReader(const google::cloud::storage::Client& client,
                     Initializer<GcsObject> object,
                     ReadObjectOptions&&... read_object_options);

  GcsReader(GcsReader&& that) noexcept;
  GcsReader& operator=(GcsReader&& that) noexcept;

  // Makes `*this` equivalent to a newly constructed `GcsReader`. This avoids
  // constructing a temporary `GcsReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  template <typename... ReadObjectOptions>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      const google::cloud::storage::Client& client,
      Initializer<GcsObject> object, Options options,
      ReadObjectOptions&&... read_object_options);
  template <typename... ReadObjectOptions>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      const google::cloud::storage::Client& client,
      Initializer<GcsObject> object,
      ReadObjectOptions&&... read_object_options);

  google::cloud::storage::Client& client() {
    RIEGELI_ASSERT(client_ != std::nullopt)
        << "Failed precondition of GcsReader::client(): "
           "default-constructed GcsReader";
    return *client_;
  }
  const GcsObject& object() const { return object_; }

  // Returns the object's generation at the time of the download, if known.
  std::optional<int64_t> generation() const { return src().generation(); }

  // Returns the object's metageneration at the time of the download, if known.
  std::optional<int64_t> metageneration() const {
    return src().metageneration();
  }

  // Returns the object's storage class at the time of the download, if known.
  std::optional<absl::string_view> storage_class() const {
    return src().storage_class();
  }

  bool SupportsRandomAccess() override;
  bool SupportsNewReader() override;

 protected:
  void Done() override;
  absl::Status AnnotateStatusImpl(absl::Status status) override;
  bool ReadInternal(size_t min_length, size_t max_length, char* dest) override;
  bool SeekBehindBuffer(Position new_pos) override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;

 private:
  friend class GcsWriter;  // For `set_exact_size()`.

  struct NewReaderTag {};

  enum class Origin { kBegin, kEnd };
  struct RangeOptions {
    Origin origin;
    int64_t initial_pos;
    std::optional<int64_t> max_size;
  };

  explicit GcsReader(
      NewReaderTag, const google::cloud::storage::Client& client,
      const GcsObject& object,
      const std::function<google::cloud::storage::ObjectReadStream(
          GcsReader&, int64_t)>& read_object,
      BufferOptions buffer_options, Position read_from_offset);

  template <typename... ReadObjectOptions>
  static RangeOptions GetRangeOptions(
      const ReadObjectOptions&... read_object_options);

  template <typename ReadObjectOption>
  struct NewReadObjectOptionPredicate
      : std::negation<std::disjunction<
            std::is_same<std::decay_t<ReadObjectOption>,
                         google::cloud::storage::ReadFromOffset>,
            std::is_same<std::decay_t<ReadObjectOption>,
                         google::cloud::storage::ReadRange>,
            std::is_same<std::decay_t<ReadObjectOption>,
                         google::cloud::storage::ReadLast>>> {};

  template <typename... ReadObjectOptions, size_t... indices,
            typename OffsetOption>
  google::cloud::storage::ObjectReadStream ApplyReadObject(
      const std::tuple<ReadObjectOptions...>& read_object_options,
      std::index_sequence<indices...>, OffsetOption&& offset_option) {
    return object_.generation() == std::nullopt
               ? client_->ReadObject(object_.bucket_name(),
                                     object_.object_name(),
                                     std::get<indices>(read_object_options)...,
                                     std::forward<OffsetOption>(offset_option))
               : client_->ReadObject(
                     object_.bucket_name(), object_.object_name(),
                     google::cloud::storage::Generation(*object_.generation()),
                     std::get<indices>(read_object_options)...,
                     std::forward<OffsetOption>(offset_option));
  }

  template <typename... ReadObjectOptions>
  void Initialize(const Options& options,
                  ReadObjectOptions&&... read_object_options);
  void Initialize(google::cloud::storage::ObjectReadStream stream,
                  BufferOptions buffer_options,
                  const RangeOptions& range_options);
  template <typename... ReadObjectOptions>
  void SetReadObject(const RangeOptions& range_options,
                     const ReadObjectOptions&... read_object_options);
  void PropagateStatus();
  ABSL_ATTRIBUTE_COLD void PropagateStatusSlow();

  std::optional<google::cloud::storage::Client> client_;
  GcsObject object_;
  std::function<google::cloud::storage::ObjectReadStream(GcsReader&, int64_t)>
      read_object_;
};

// Implementation details follow.

template <typename... ReadObjectOptions>
GcsReader::GcsReader(const google::cloud::storage::Client& client,
                     Initializer<GcsObject> object, Options options,
                     ReadObjectOptions&&... read_object_options)
    : IStreamReader(kClosed), client_(client), object_(std::move(object)) {
  Initialize(options, std::forward<ReadObjectOptions>(read_object_options)...);
}

template <typename... ReadObjectOptions>
GcsReader::GcsReader(const google::cloud::storage::Client& client,
                     Initializer<GcsObject> object,
                     ReadObjectOptions&&... read_object_options)
    : GcsReader(client, std::move(object), Options(),
                std::forward<ReadObjectOptions>(read_object_options)...) {}

inline GcsReader::GcsReader(GcsReader&& that) noexcept
    : IStreamReader(static_cast<IStreamReader&&>(that)),
      client_(std::move(that.client_)),
      object_(std::move(that.object_)),
      read_object_(std::move(that.read_object_)) {}

inline GcsReader& GcsReader::operator=(GcsReader&& that) noexcept {
  IStreamReader::operator=(static_cast<IStreamReader&&>(that));
  client_ = std::move(that.client_);
  object_ = std::move(that.object_);
  read_object_ = std::move(that.read_object_);
  return *this;
}

template <typename... ReadObjectOptions>
void GcsReader::Reset(const google::cloud::storage::Client& client,
                      Initializer<GcsObject> object, Options options,
                      ReadObjectOptions&&... read_object_options) {
  client_ = client;
  riegeli::Reset(object_, std::move(object));
  Initialize(options, std::forward<ReadObjectOptions>(read_object_options)...);
}

template <typename... ReadObjectOptions>
void GcsReader::Reset(const google::cloud::storage::Client& client,
                      Initializer<GcsObject> object,
                      ReadObjectOptions&&... read_object_options) {
  Reset(client, std::move(object), Options(),
        std::forward<ReadObjectOptions>(read_object_options)...);
}

template <typename... ReadObjectOptions>
inline void GcsReader::Initialize(const Options& options,
                                  ReadObjectOptions&&... read_object_options) {
  if (ABSL_PREDICT_FALSE(!object_.ok())) {
    Fail(object_.status());
    return;
  }
  const RangeOptions range_options = GetRangeOptions(read_object_options...);
  SetReadObject(range_options, read_object_options...);
  Initialize(
      object_.generation() == std::nullopt
          ? client_->ReadObject(
                object_.bucket_name(), object_.object_name(),
                std::forward<ReadObjectOptions>(read_object_options)...)
          : client_->ReadObject(
                object_.bucket_name(), object_.object_name(),
                google::cloud::storage::Generation(*object_.generation()),
                std::forward<ReadObjectOptions>(read_object_options)...),
      options.buffer_options(), range_options);
}

template <typename... ReadObjectOptions>
inline GcsReader::RangeOptions GcsReader::GetRangeOptions(
    const ReadObjectOptions&... read_object_options) {
  const google::cloud::storage::ReadFromOffset& read_from_offset =
      gcs_internal::GetOption<google::cloud::storage::ReadFromOffset>(
          read_object_options...);
  const google::cloud::storage::ReadRange& read_range =
      gcs_internal::GetOption<google::cloud::storage::ReadRange>(
          read_object_options...);
  if (read_range.has_value()) {
    return RangeOptions{
        Origin::kBegin,
        read_from_offset.has_value()
            ? SignedMax(read_range.value().begin, read_from_offset.value())
            : read_range.value().begin,
        read_range.value().end};
  }
  const google::cloud::storage::ReadLast& read_last =
      gcs_internal::GetOption<google::cloud::storage::ReadLast>(
          read_object_options...);
  if (read_last.has_value()) {
    return RangeOptions{Origin::kEnd, read_last.value(), std::nullopt};
  }
  return RangeOptions{Origin::kBegin, read_from_offset.value_or(0),
                      std::nullopt};
}

template <typename... ReadObjectOptions>
inline void GcsReader::SetReadObject(
    const RangeOptions& range_options,
    const ReadObjectOptions&... read_object_options) {
  if (range_options.max_size == std::nullopt) {
    read_object_ =
        [new_read_object_options = DecayTuple(
             Filter<NewReadObjectOptionPredicate>(read_object_options...))](
            GcsReader& self, int64_t read_from_offset) mutable {
          return self.ApplyReadObject(
              new_read_object_options,
              std::make_index_sequence<
                  std::tuple_size_v<decltype(new_read_object_options)>>(),
              google::cloud::storage::ReadFromOffset(read_from_offset));
        };
  } else {
    read_object_ =
        [new_read_object_options = DecayTuple(
             Filter<NewReadObjectOptionPredicate>(read_object_options...)),
         max_size = *range_options.max_size](GcsReader& self,
                                             int64_t read_from_offset) mutable {
          return self.ApplyReadObject(
              new_read_object_options,
              std::make_index_sequence<
                  std::tuple_size_v<decltype(new_read_object_options)>>(),
              google::cloud::storage::ReadRange(read_from_offset, max_size));
        };
  }
}

}  // namespace riegeli

#endif  // RIEGELI_GCS_GCS_READER_H_


================================================
FILE: riegeli/gcs/gcs_writer.cc
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/gcs/gcs_writer.h"

#include <stdint.h>

#include <functional>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "google/cloud/status.h"
#include "google/cloud/status_or.h"
#include "google/cloud/storage/object_metadata.h"
#include "google/cloud/storage/object_write_stream.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/object.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/ostream_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/gcs/gcs_internal.h"
#include "riegeli/gcs/gcs_object.h"
#include "riegeli/gcs/gcs_reader.h"

namespace riegeli {

void GcsWriter::Reset(Closed) {
  OStreamWriter::Reset(kClosed);
  client_ = std::nullopt;
  object_ = GcsObject();
  temp_object_name_ = std::nullopt;
  read_mode_buffer_options_ = BufferOptions();
  write_temp_object_ = nullptr;
  compose_object_ = nullptr;
  delete_temp_object_ = nullptr;
  get_object_size_ = nullptr;
  make_gcs_reader_ = nullptr;
  writing_temp_object_ = false;
  initial_temp_object_pos_ = 0;
  metadata_ = std::nullopt;
  associated_reader_.Reset();
}

bool GcsWriter::CheckPreconditions(bool append, bool resumable_upload_session) {
  if (ABSL_PREDICT_FALSE(object_.generation() != std::nullopt && !append)) {
    return Fail(absl::FailedPreconditionError(
        "Specifying a generation requires GcsWriter::Options::append()"));
  }
  if (temp_object_name_ == std::nullopt) {
    if (ABSL_PREDICT_FALSE(append)) {
      return Fail(absl::FailedPreconditionError(
          "GcsWriter::Options::append() requires "
          "GcsWriter::Options::temp_object_name()"));
    }
  } else {
    if (ABSL_PREDICT_FALSE(*temp_object_name_ == object_.object_name())) {
      return Fail(absl::FailedPreconditionError(
          absl::StrCat("GcsWriter::Options::temp_object_name() must differ "
                       "from object name: \"",
                       object_.object_name(), "\"")));
    }
    if (ABSL_PREDICT_FALSE(resumable_upload_session)) {
      return Fail(absl::FailedPreconditionError(
          "UseResumableUploadSession() is incompatible with "
          "GcsWriter::Options::temp_object_name()"));
    }
  }
  return true;
}

void GcsWriter::Initialize(google::cloud::storage::ObjectWriteStream stream,
                           BufferOptions buffer_options, bool append) {
  uint64_t initial_pos;
  if (append) {
    stream.Close();
    if (stream.last_status().code() ==
        google::cloud::StatusCode::kFailedPrecondition) {
      // Precondition `IfGenerationMatch(0)` failed: object already exists.
    } else if (ABSL_PREDICT_FALSE(stream.fail())) {
      FailOperation("ObjectWriteStream::Close()");
      PropagateStatus();
      return;
    }
    const google::cloud::StatusOr<google::cloud::storage::ObjectMetadata>
        metadata = get_object_size_(*this);
    if (ABSL_PREDICT_FALSE(!metadata.ok())) {
      Fail(gcs_internal::FromCloudStatus(metadata.status()));
      return;
    }
    initial_pos = metadata->size();
    stream = write_temp_object_(*this);
    writing_temp_object_ = true;
    initial_temp_object_pos_ = initial_pos;
  } else {
    initial_pos = stream.next_expected_byte();
  }
  OStreamWriter::Reset(std::move(stream),
                       OStreamWriterBase::Options()
                           .set_assumed_pos(initial_pos)
                           .set_buffer_options(buffer_options));
  PropagateStatus();
  if (ABSL_PREDICT_FALSE(!dest().IsOpen()) && ABSL_PREDICT_TRUE(ok())) {
    // A resumable upload is already finished. Mark `GcsWriter` closed without
    // clearing `dest()`.
    OStreamWriterBase::Reset(kClosed);
  }
}

void GcsWriter::Done() {
  OStreamWriter::Done();
  dest().Close();
  if (ABSL_PREDICT_FALSE(dest().fail())) {
    FailOperation("ObjectWriteStream::Close()");
    PropagateStatus();
  }
  if (writing_temp_object_ && ABSL_PREDICT_TRUE(ok())) {
    if (start_pos() > initial_temp_object_pos_) {
      google::cloud::StatusOr<google::cloud::storage::ObjectMetadata> metadata =
          compose_object_(*this);
      if (ABSL_PREDICT_FALSE(!metadata.ok())) {
        Fail(gcs_internal::FromCloudStatus(metadata.status()));
        goto failed;
      }
      metadata_ = *std::move(metadata);
    }
    if (const google::cloud::Status status = delete_temp_object_(*this);
        ABSL_PREDICT_FALSE(!status.ok())) {
      Fail(gcs_internal::FromCloudStatus(status));
    }
  }
failed:
  client_ = std::nullopt;
  write_temp_object_ = nullptr;
  compose_object_ = nullptr;
  delete_temp_object_ = nullptr;
  get_object_size_ = nullptr;
  make_gcs_reader_ = nullptr;
  associated_reader_.Reset();
}

absl::Status GcsWriter::AnnotateStatusImpl(absl::Status status) {
  if (object_.ok()) {
    status = Annotate(status, absl::StrCat("writing ", object_));
  }
  return OStreamWriter::AnnotateStatusImpl(std::move(status));
}

inline void GcsWriter::PropagateStatus() {
  const google::cloud::Status status = dest().last_status();
  if (ABSL_PREDICT_FALSE(!status.ok())) PropagateStatusSlow(status);
}

void GcsWriter::PropagateStatusSlow(const google::cloud::Status& status) {
  RIEGELI_ASSERT(!status.ok())
      << "Failed precondition of GcsWriter::PropagateStatusSlow(): "
         "ObjectWriteStream not failed";
  MarkNotFailed();
  Fail(gcs_internal::FromCloudStatus(status));
}

bool GcsWriter::WriteInternal(absl::string_view src) {
  RIEGELI_ASSERT(!src.empty())
      << "Failed precondition of BufferedWriter::WriteInternal(): "
         "nothing to write";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedWriter::WriteInternal()";
  const bool result = OStreamWriter::WriteInternal(src);
  PropagateStatus();
  return result;
}

bool GcsWriter::FlushImpl(FlushType flush_type) {
  const bool flush_ok = OStreamWriter::FlushImpl(flush_type);
  PropagateStatus();
  if (ABSL_PREDICT_FALSE(!flush_ok)) return false;
  if (temp_object_name_ == std::nullopt ||
      (writing_temp_object_ && start_pos() == initial_temp_object_pos_)) {
    return true;
  }
  dest().Close();
  if (ABSL_PREDICT_FALSE(dest().fail())) {
    FailOperation("ObjectWriteStream::Close()");
    PropagateStatus();
    return false;
  }
  if (writing_temp_object_) {
    google::cloud::StatusOr<google::cloud::storage::ObjectMetadata> metadata =
        compose_object_(*this);
    if (ABSL_PREDICT_FALSE(!metadata.ok())) {
      return Fail(gcs_internal::FromCloudStatus(metadata.status()));
    }
    metadata_ = *std::move(metadata);
  }
  dest() = write_temp_object_(*this);
  writing_temp_object_ = true;
  initial_temp_object_pos_ = start_pos();
  if (ABSL_PREDICT_FALSE(dest().fail())) {
    FailOperation("Client::WriteObject()");
    PropagateStatus();
    return false;
  }
  return true;
}

bool GcsWriter::SupportsReadMode() { return temp_object_name_ != std::nullopt; }

Reader* GcsWriter::ReadModeBehindBuffer(Position initial_pos) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::ReadModeBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!GcsWriter::SupportsReadMode())) {
    // Delegate to the base class to avoid repeating the error message.
    return OStreamWriter::ReadModeBehindBuffer(initial_pos);
  }
  if (ABSL_PREDICT_FALSE(!GcsWriter::FlushImpl(FlushType::kFromObject))) {
    return nullptr;
  }
  // `ObjectReadStream` does not support seeking to the very end.
  const Position read_from_offset =
      UnsignedMin(initial_pos, SaturatingSub(start_pos(), Position{1}));
  GcsReader* const reader = make_gcs_reader_(*this, read_from_offset);
  reader->set_exact_size(start_pos());
  if (initial_pos > read_from_offset) {
    if (ABSL_PREDICT_TRUE(reader->Pull())) reader->move_cursor(1);
  }
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/gcs/gcs_writer.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_GCS_GCS_WRITER_H_
#define RIEGELI_GCS_GCS_WRITER_H_

#include <stddef.h>
#include <stdint.h>

#include <functional>
#include <optional>
#include <string>
#include <tuple>
#include <type_traits>
#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "google/cloud/status.h"
#include "google/cloud/status_or.h"
#include "google/cloud/storage/auto_finalize.h"
#include "google/cloud/storage/client.h"
#include "google/cloud/storage/download_options.h"
#include "google/cloud/storage/object_metadata.h"
#include "google/cloud/storage/object_write_stream.h"
#include "google/cloud/storage/upload_options.h"
#include "google/cloud/storage/user_ip_option.h"
#include "google/cloud/storage/well_known_headers.h"
#include "google/cloud/storage/well_known_parameters.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/string_ref.h"
#include "riegeli/base/type_traits.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/ostream_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/gcs/gcs_internal.h"
#include "riegeli/gcs/gcs_object.h"
#include "riegeli/gcs/gcs_reader.h"

namespace riegeli {

// A `Writer` which writes to a Google Cloud Storage object.
class GcsWriter
    : public OStreamWriter<google::cloud::storage::ObjectWriteStream> {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // The effective buffer size is always a multiple of 256 KiB, and is at
    // least the `UploadBufferSize` setting in the client (which defaults to
    // 8 MiB).
    static constexpr size_t kDefaultMinBufferSize = size_t{8} << 20;
    static constexpr size_t kDefaultMaxBufferSize = size_t{8} << 20;

    // If `std::nullopt`, writing os performed directly to the destination
    // object. `Flush()` is not effective, `append()` and `ReadMode()` are not
    // supported, but `UseResumableUploadSession()` is supported.
    //
    // If not `std::nullopt`, writing is performed either directly to the
    // destination object, or to a temporary object with this name, which is
    // then composed with the destination object and deleted. `Flush()` is
    // effective, `append()` and `ReadMode()` are supported, but
    // `UseResumableUploadSession()` is not supported.
    //
    // Default: `std::nullopt`.
    Options& set_temp_object_name(StringInitializer temp_object_name) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      riegeli::Reset(temp_object_name_, std::move(temp_object_name));
      return *this;
    }
    Options&& set_temp_object_name(StringInitializer temp_object_name) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_temp_object_name(std::move(temp_object_name)));
    }
    std::optional<std::string>& temp_object_name()
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return temp_object_name_;
    }
    const std::optional<std::string>& temp_object_name() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return temp_object_name_;
    }

    // If `false`, replaces existing contents of the destination object,
    // clearing it first.
    //
    // If `true`, appends to existing contents of the destination object.
    // This requires `temp_object_name() != std::nullopt`.
    //
    // Default: `false`.
    Options& set_append(bool append) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      append_ = append;
      return *this;
    }
    Options&& set_append(bool append) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_append(append));
    }
    bool append() const { return append_; }

    // Buffer options for `GcsReader` returned by `ReadMode()`.
    //
    // Default: like in `GcsReader::Options`, i.e.
    //   `BufferOptions()
    //        .set_min_buffer_size(size_t{64} << 10)
    //        .set_max_buffer_size(size_t{1} << 20)`.
    Options& set_read_mode_buffer_options(
        BufferOptions read_mode_buffer_options) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      read_mode_buffer_options_ = read_mode_buffer_options;
      return *this;
    }
    Options&& set_read_mode_buffer_options(
        BufferOptions read_mode_buffer_options) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_read_mode_buffer_options(read_mode_buffer_options));
    }
    BufferOptions read_mode_buffer_options() const {
      return read_mode_buffer_options_;
    }

   private:
    std::optional<std::string> temp_object_name_;
    bool append_ = false;
    BufferOptions read_mode_buffer_options_ =
        BufferOptions()
            .set_min_buffer_size(size_t{64} << 10)
            .set_max_buffer_size(size_t{1} << 20);
  };

  // Creates a closed `GcsWriter`.
  explicit GcsWriter(Closed) : OStreamWriter(kClosed) {}

  // Will write to `object`.
  //
  // If an upload session is being resumed, `pos()` corresponds to the next
  // position to write. `GcsWriter` is left closed if the resumed upload session
  // is finalized.
  template <typename... WriteObjectOptions>
  explicit GcsWriter(const google::cloud::storage::Client& client,
                     Initializer<GcsObject> object, Options options,
                     WriteObjectOptions&&... write_object_options);
  template <typename... WriteObjectOptions>
  explicit GcsWriter(const google::cloud::storage::Client& client,
                     Initializer<GcsObject> object,
                     WriteObjectOptions&&... write_object_options);

  GcsWriter(GcsWriter&& that) noexcept;
  GcsWriter& operator=(GcsWriter&& that) noexcept;

  // Makes `*this` equivalent to a newly constructed `GcsWriter`. This avoids
  // constructing a temporary `GcsWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  template <typename... WriteObjectOptions>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      const google::cloud::storage::Client& client,
      Initializer<GcsObject> object, Options options,
      WriteObjectOptions&&... write_object_options);
  template <typename... WriteObjectOptions>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      const google::cloud::storage::Client& client,
      Initializer<GcsObject> object,
      WriteObjectOptions&&... write_object_options);

  google::cloud::storage::Client& client() {
    RIEGELI_ASSERT(client_ != std::nullopt)
        << "Failed precondition of GcsWriter::client(): "
           "default-constructed GcsWriter";
    return *client_;
  }
  const GcsObject& object() const { return object_; }
  std::optional<absl::string_view> temp_object_name() const {
    return temp_object_name_;
  }

  // Returns metadata about upload results.
  //
  // Precondition: `Close()` succeeded or `GcsWriter` was created with a resumed
  // upload session which was finalized.
  const google::cloud::storage::ObjectMetadata& metadata() const&;
  google::cloud::storage::ObjectMetadata&& metadata() &&;

  // Returns the resumable upload session id for this upload, or an empty string
  // for uploads that do not use resumable upload session ids.
  absl::string_view resumable_session_id() const {
    return dest().resumable_session_id();
  }

  bool SupportsReadMode() override;

 protected:
  void Done() override;
  absl::Status AnnotateStatusImpl(absl::Status status) override;
  bool WriteInternal(absl::string_view src) override;
  bool FlushImpl(FlushType flush_type) override;
  Reader* ReadModeBehindBuffer(Position initial_pos) override;

 private:
  template <typename WriteObjectOption>
  struct CommonOptionPredicate
      : std::disjunction<std::is_same<std::decay_t<WriteObjectOption>,
                                      google::cloud::storage::QuotaUser>,
                         std::is_same<std::decay_t<WriteObjectOption>,
                                      google::cloud::storage::UserIp>,
                         std::is_same<std::decay_t<WriteObjectOption>,
                                      google::cloud::storage::UserProject>> {};

  template <typename WriteObjectOption>
  struct ComposeObjectOptionPredicate
      : std::disjunction<CommonOptionPredicate<WriteObjectOption>,
                         std::is_same<std::decay_t<WriteObjectOption>,
                                      google::cloud::storage::EncryptionKey>,
                         std::is_same<std::decay_t<WriteObjectOption>,
                                      google::cloud::storage::Fields>,
                         std::is_same<std::decay_t<WriteObjectOption>,
                                      google::cloud::storage::KmsKeyName>> {};

  template <typename WriteObjectOption>
  struct DeleteTempObjectOptionPredicate
      : CommonOptionPredicate<WriteObjectOption> {};

  template <typename WriteObjectOption>
  struct GetObjectSizeOptionPredicate
      : CommonOptionPredicate<WriteObjectOption> {};

  template <typename WriteObjectOption>
  struct MakeGcsReaderOptionPredicate
      : CommonOptionPredicate<WriteObjectOption> {};

  template <typename... ComposeObjectOptions, size_t... indices>
  google::cloud::StatusOr<google::cloud::storage::ObjectMetadata>
  ApplyComposeObject(
      const std::tuple<ComposeObjectOptions...>& compose_object_options,
      std::index_sequence<indices...>) {
    std::vector<google::cloud::storage::ComposeSourceObject> source_objects;
    source_objects.reserve(2);
#if __cpp_aggregate_paren_init
    source_objects.emplace_back(
        object_.object_name(),
        already_composed_ ? std::nullopt : object_.generation(), std::nullopt);
    source_objects.emplace_back(*temp_object_name_, std::nullopt, std::nullopt);
#else
    source_objects.push_back(google::cloud::storage::ComposeSourceObject{
        object_.object_name(),
        already_composed_ ? std::nullopt : object_.generation(), std::nullopt});
    source_objects.push_back(google::cloud::storage::ComposeSourceObject{
        *temp_object_name_, std::nullopt, std::nullopt});
#endif
    already_composed_ = true;
    return client_->ComposeObject(object_.bucket_name(), source_objects,
                                  object_.object_name(),
                                  std::get<indices>(compose_object_options)...);
  }

  template <typename... WriteObjectOptions, size_t... indices,
            typename... ExtraOptions>
  google::cloud::storage::ObjectWriteStream ApplyWriteTempObject(
      const std::tuple<WriteObjectOptions...>& write_object_options,
      std::index_sequence<indices...>, ExtraOptions&&... extra_options) {
    return client_->WriteObject(object_.bucket_name(), *temp_object_name_,
                                std::get<indices>(write_object_options)...,
                                std::forward<ExtraOptions>(extra_options)...,
                                google::cloud::storage::Fields(""),
                                google::cloud::storage::AutoFinalizeDisabled());
  }

  template <typename... DeleteObjectOptions, size_t... indices>
  google::cloud::Status ApplyDeleteTempObject(
      const std::tuple<DeleteObjectOptions...>& delete_object_options,
      std::index_sequence<indices...>) {
    return client_->DeleteObject(object_.bucket_name(), *temp_object_name_,
                                 std::get<indices>(delete_object_options)...,
                                 google::cloud::storage::Fields(""));
  }

  template <typename... GetObjectMetadataOptions, size_t... indices>
  google::cloud::StatusOr<google::cloud::storage::ObjectMetadata>
  ApplyGetObjectSize(const std::tuple<GetObjectMetadataOptions...>&
                         get_object_metadata_options,
                     std::index_sequence<indices...>) {
    return object_.generation() == std::nullopt
               ? client_->GetObjectMetadata(
                     object_.bucket_name(), object_.object_name(),
                     std::get<indices>(get_object_metadata_options)...,
                     google::cloud::storage::Fields("size"))
               : client_->GetObjectMetadata(
                     object_.bucket_name(), object_.object_name(),
                     google::cloud::storage::Generation(*object_.generation()),
                     std::get<indices>(get_object_metadata_options)...,
                     google::cloud::storage::Fields("size"));
  }

  template <typename... ReadObjectOptions, size_t... indices>
  GcsReader* ApplyMakeGcsReader(
      const std::tuple<ReadObjectOptions...>& read_object_options,
      std::index_sequence<indices...>, int64_t read_from_offset) {
    return associated_reader_.ResetReader(
        client(),
        already_composed_
            ? GcsObject(object().bucket_name(), object().object_name())
            : object(),
        GcsReader::Options().set_buffer_options(read_mode_buffer_options_),
        std::get<indices>(read_object_options)...,
        google::cloud::storage::ReadFromOffset(read_from_offset));
  }

  bool CheckPreconditions(bool append, bool resumable_upload_session);
  template <typename... WriteObjectOptions>
  void Initialize(const Options& options,
                  WriteObjectOptions&&... write_object_options);
  template <typename... WriteObjectOptions>
  void SetFunctions(const WriteObjectOptions&... write_object_options);
  void Initialize(google::cloud::storage::ObjectWriteStream stream,
                  BufferOptions buffer_options, bool append);
  void PropagateStatus();
  ABSL_ATTRIBUTE_COLD void PropagateStatusSlow(
      const google::cloud::Status& status);

  std::optional<google::cloud::storage::Client> client_;
  GcsObject object_;
  std::optional<std::string> temp_object_name_;
  BufferOptions read_mode_buffer_options_;
  std::function<google::cloud::storage::ObjectWriteStream(GcsWriter&)>
      write_temp_object_;
  std::function<google::cloud::StatusOr<google::cloud::storage::ObjectMetadata>(
      GcsWriter&)>
      compose_object_;
  std::function<google::cloud::Status(GcsWriter&)> delete_temp_object_;
  std::function<google::cloud::StatusOr<google::cloud::storage::ObjectMetadata>(
      GcsWriter&)>
      get_object_size_;
  std::function<GcsReader*(GcsWriter&, int64_t)> make_gcs_reader_;
  bool writing_temp_object_ = false;
  bool already_composed_ = false;
  Position initial_temp_object_pos_ = 0;
  std::optional<google::cloud::storage::ObjectMetadata> metadata_;

  AssociatedReader<GcsReader> associated_reader_;
};

// Implementation details follow.

template <typename... WriteObjectOptions>
GcsWriter::GcsWriter(const google::cloud::storage::Client& client,
                     Initializer<GcsObject> object, Options options,
                     WriteObjectOptions&&... write_object_options)
    : OStreamWriter(kClosed),
      client_(client),
      object_(std::move(object)),
      temp_object_name_(std::move(options.temp_object_name())),
      read_mode_buffer_options_(options.read_mode_buffer_options()) {
  Initialize(options,
             std::forward<WriteObjectOptions>(write_object_options)...);
}

template <typename... WriteObjectOptions>
GcsWriter::GcsWriter(const google::cloud::storage::Client& client,
                     Initializer<GcsObject> object,
                     WriteObjectOptions&&... write_object_options)
    : GcsWriter(client, std::move(object), Options(),
                std::forward<WriteObjectOptions>(write_object_options)...) {}

inline GcsWriter::GcsWriter(GcsWriter&& that) noexcept
    : OStreamWriter(static_cast<OStreamWriter&&>(that)),
      client_(std::move(that.client_)),
      object_(std::move(that.object_)),
      temp_object_name_(std::exchange(that.temp_object_name_, std::nullopt)),
      read_mode_buffer_options_(that.read_mode_buffer_options_),
      write_temp_object_(std::move(that.write_temp_object_)),
      compose_object_(std::move(that.compose_object_)),
      delete_temp_object_(std::move(that.delete_temp_object_)),
      get_object_size_(std::move(that.get_object_size_)),
      make_gcs_reader_(std::move(that.make_gcs_reader_)),
      writing_temp_object_(that.writing_temp_object_),
      already_composed_(that.already_composed_),
      initial_temp_object_pos_(that.initial_temp_object_pos_),
      metadata_(std::exchange(that.metadata_, std::nullopt)),
      associated_reader_(std::move(that.associated_reader_)) {}

inline GcsWriter& GcsWriter::operator=(GcsWriter&& that) noexcept {
  OStreamWriter::operator=(static_cast<OStreamWriter&&>(that));
  client_ = std::move(that.client_);
  object_ = std::move(that.object_);
  temp_object_name_ = std::exchange(that.temp_object_name_, std::nullopt);
  read_mode_buffer_options_ = that.read_mode_buffer_options_;
  write_temp_object_ = std::move(that.write_temp_object_);
  compose_object_ = std::move(that.compose_object_);
  delete_temp_object_ = std::move(that.delete_temp_object_);
  get_object_size_ = std::move(that.get_object_size_);
  make_gcs_reader_ = std::move(that.make_gcs_reader_);
  writing_temp_object_ = that.writing_temp_object_;
  already_composed_ = that.already_composed_;
  initial_temp_object_pos_ = that.initial_temp_object_pos_;
  metadata_ = std::exchange(that.metadata_, std::nullopt);
  associated_reader_ = std::move(that.associated_reader_);
  return *this;
}

template <typename... WriteObjectOptions>
void GcsWriter::Reset(const google::cloud::storage::Client& client,
                      Initializer<GcsObject> object, Options options,
                      WriteObjectOptions&&... write_object_options) {
  OStreamWriter::Reset(kClosed);
  client_ = client;
  riegeli::Reset(object_, std::move(object));
  temp_object_name_ = std::move(options.temp_object_name());
  read_mode_buffer_options_ = options.read_mode_buffer_options();
  write_temp_object_ = nullptr;
  compose_object_ = nullptr;
  delete_temp_object_ = nullptr;
  get_object_size_ = nullptr;
  make_gcs_reader_ = nullptr;
  writing_temp_object_ = false;
  already_composed_ = false;
  initial_temp_object_pos_ = 0;
  metadata_ = std::nullopt;
  associated_reader_.Reset();
  Initialize(options,
             std::forward<WriteObjectOptions>(write_object_options)...);
}

template <typename... WriteObjectOptions>
void GcsWriter::Reset(const google::cloud::storage::Client& client,
                      Initializer<GcsObject> object,
                      WriteObjectOptions&&... write_object_options) {
  Reset(client, std::move(object), Options(),
        std::forward<WriteObjectOptions>(write_object_options)...);
}

template <typename... WriteObjectOptions>
inline void GcsWriter::Initialize(
    const Options& options, WriteObjectOptions&&... write_object_options) {
  if (ABSL_PREDICT_FALSE(!object_.ok())) {
    Fail(object_.status());
    return;
  }
  if (ABSL_PREDICT_FALSE(!CheckPreconditions(
          options.append(),
          gcs_internal::GetOption<
              google::cloud::storage::UseResumableUploadSession>(
              write_object_options...)
              .has_value()))) {
    return;
  }
  if (temp_object_name_ != std::nullopt) SetFunctions(write_object_options...);
  Initialize(
      options.append()
          ? client_->WriteObject(
                object_.bucket_name(), object_.object_name(),
                std::forward<WriteObjectOptions>(write_object_options)...,
                google::cloud::storage::IfGenerationMatch(0),
                google::cloud::storage::AutoFinalizeDisabled())
          : client_->WriteObject(
                object_.bucket_name(), object_.object_name(),
                std::forward<WriteObjectOptions>(write_object_options)...,
                google::cloud::storage::AutoFinalizeDisabled()),
      options.buffer_options(), options.append());
}

template <typename... WriteObjectOptions>
inline void GcsWriter::SetFunctions(
    const WriteObjectOptions&... write_object_options) {
  write_temp_object_ =
      [write_temp_object_options = std::tuple<WriteObjectOptions...>(
           write_object_options...)](GcsWriter& self) mutable {
        return self.ApplyWriteTempObject(
            write_temp_object_options,
            std::make_index_sequence<
                std::tuple_size_v<decltype(write_temp_object_options)>>());
      };
  compose_object_ =
      [compose_object_options = DecayTuple(Filter<ComposeObjectOptionPredicate>(
           write_object_options...))](GcsWriter& self) mutable {
        return self.ApplyComposeObject(
            compose_object_options,
            std::make_index_sequence<
                std::tuple_size_v<decltype(compose_object_options)>>());
      };
  delete_temp_object_ =
      [delete_temp_object_options = DecayTuple(
           Filter<DeleteTempObjectOptionPredicate>(write_object_options...))](
          GcsWriter& self) mutable {
        return self.ApplyDeleteTempObject(
            delete_temp_object_options,
            std::make_index_sequence<
                std::tuple_size_v<decltype(delete_temp_object_options)>>());
      };
  get_object_size_ =
      [get_object_size_options = DecayTuple(
           Filter<GetObjectSizeOptionPredicate>(write_object_options...))](
          GcsWriter& self) mutable {
        return self.ApplyGetObjectSize(
            get_object_size_options,
            std::make_index_sequence<
                std::tuple_size_v<decltype(get_object_size_options)>>());
      };
  make_gcs_reader_ =
      [gcs_reader_options = DecayTuple(
           Filter<MakeGcsReaderOptionPredicate>(write_object_options...))](
          GcsWriter& self, int64_t read_from_offset) mutable {
        return self.ApplyMakeGcsReader(
            gcs_reader_options,
            std::make_index_sequence<
                std::tuple_size_v<decltype(gcs_reader_options)>>(),
            read_from_offset);
      };
}

inline const google::cloud::storage::ObjectMetadata& GcsWriter::metadata()
    const& {
  if (metadata_ != std::nullopt) return *metadata_;
  const google::cloud::StatusOr<google::cloud::storage::ObjectMetadata>&
      metadata = dest().metadata();
  RIEGELI_ASSERT_OK(metadata)
      << "Failed precondition of GcsWriter::metadata(): "
         "Close() must have succeeded";
  return *metadata;
}

inline google::cloud::storage::ObjectMetadata&& GcsWriter::metadata() && {
  if (metadata_ != std::nullopt) return *std::move(metadata_);
  google::cloud::StatusOr<google::cloud::storage::ObjectMetadata>&& metadata =
      std::move(dest()).metadata();
  RIEGELI_ASSERT_OK(metadata)
      << "Failed precondition of GcsWriter::metadata(): "
         "Close() must have succeeded";
  return *std::move(metadata);
}

}  // namespace riegeli

#endif  // RIEGELI_GCS_GCS_WRITER_H_


================================================
FILE: riegeli/lines/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "newline",
    hdrs = ["newline.h"],
    deps = ["@com_google_absl//absl/strings:string_view"],
)

cc_library(
    name = "line_reading",
    srcs = ["line_reading.cc"],
    hdrs = ["line_reading.h"],
    deps = [
        ":newline",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:chain",
        "//riegeli/bytes:reader",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "line_writing",
    hdrs = ["line_writing.h"],
    deps = [
        ":newline",
        "//riegeli/base:assert",
        "//riegeli/base:type_traits",
        "//riegeli/bytes:stringify",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base:core_headers",
    ],
)

cc_library(
    name = "text_reader",
    srcs = ["text_reader.cc"],
    hdrs = ["text_reader.h"],
    deps = [
        ":newline",
        "//riegeli/base:any",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:buffer_options",
        "//riegeli/bytes:buffered_reader",
        "//riegeli/bytes:prefix_limiting_reader",
        "//riegeli/bytes:reader",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
    ],
)

cc_library(
    name = "text_writer",
    srcs = ["text_writer.cc"],
    hdrs = ["text_writer.h"],
    deps = [
        ":line_writing",
        ":newline",
        "//riegeli/base:any",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:buffer_options",
        "//riegeli/bytes:buffered_writer",
        "//riegeli/bytes:prefix_limiting_writer",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ],
)


================================================
FILE: riegeli/lines/line_reading.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/lines/line_reading.h"

#include <stddef.h>

#include <cstring>
#include <string>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/lines/newline.h"

namespace riegeli {

namespace {

// Reads `length_to_read` bytes from `src`, writes their prefix of
// `length_to_write` bytes to `dest`, appending to existing contents
// (unless `Dest` is `absl::string_view`).
//
// The data to read must be already available in the buffer.

template <typename Dest>
inline void ReadFlatAndSkip(Reader& src, size_t length_to_read,
                            size_t length_to_write, Dest& dest) {
  RIEGELI_ASSERT_LE(length_to_read, src.available())
      << "Failed precondition of ReadFlatAndSkip(): "
         "reading more than buffered";
  RIEGELI_ASSERT_LE(length_to_write, length_to_read)
      << "Failed precondition of ReadFlatAndSkip(): "
         "writing more than reading";
  src.ReadAndAppend(length_to_write, dest);
  src.Skip(length_to_read - length_to_write);
}

inline void ReadFlatAndSkip(Reader& src, size_t length_to_read,
                            size_t length_to_write, absl::string_view& dest) {
  RIEGELI_ASSERT_LE(length_to_read, src.available())
      << "Failed precondition of ReadFlatAndSkip(): "
         "reading more than buffered";
  RIEGELI_ASSERT_LE(length_to_write, length_to_read)
      << "Failed precondition of ReadFlatAndSkip(): "
         "writing more than reading";
  dest = absl::string_view(src.cursor(), length_to_write);
  src.move_cursor(length_to_read);
}

inline void ReadFlatAndSkip(Reader& src, size_t length_to_read,
                            size_t length_to_write, std::string& dest) {
  RIEGELI_ASSERT_LE(length_to_read, src.available())
      << "Failed precondition of ReadFlatAndSkip(): "
         "reading more than buffered";
  RIEGELI_ASSERT_LE(length_to_write, length_to_read)
      << "Failed precondition of ReadFlatAndSkip(): "
         "writing more than reading";
  dest.append(src.cursor(), length_to_write);
  src.move_cursor(length_to_read);
}

template <typename Dest>
inline void ReadFlat(Reader& src, size_t length, Dest& dest) {
  return ReadFlatAndSkip(src, length, length, dest);
}

template <typename Dest>
ABSL_ATTRIBUTE_COLD bool FailMaxLineLengthExceeded(Reader& src, Dest& dest,
                                                   size_t max_length) {
  ReadFlat(src, max_length, dest);
  return src.Fail(absl::ResourceExhaustedError(
      absl::StrCat("Maximum line length exceeded: ", max_length)));
}

template <typename Dest>
inline bool FoundNewline(Reader& src, Dest& dest, ReadLineOptions options,
                         size_t length, size_t newline_length) {
  const size_t length_with_newline = length + newline_length;
  if (options.keep_newline()) length = length_with_newline;
  if (ABSL_PREDICT_FALSE(length > options.max_length())) {
    return FailMaxLineLengthExceeded(src, dest, options.max_length());
  }
  ReadFlatAndSkip(src, length_with_newline, length, dest);
  return true;
}

template <typename Dest>
inline bool ReadLineInternal(Reader& src, Dest& dest, ReadLineOptions options) {
  if (ABSL_PREDICT_FALSE(!src.Pull())) return false;
  size_t length;
  do {
    switch (options.newline()) {
      case ReadNewline::kLf: {
        const char* const newline = static_cast<const char*>(
            std::memchr(src.cursor(), '\n', src.available()));
        if (ABSL_PREDICT_TRUE(newline != nullptr)) {
          return FoundNewline(src, dest, options,
                              PtrDistance(src.cursor(), newline), 1);
        }
        length = src.available();
        goto continue_reading;
      }
      case ReadNewline::kCrLfOrLf: {
        const char* newline = static_cast<const char*>(
            std::memchr(src.cursor(), '\n', src.available()));
        for (;;) {
          if (ABSL_PREDICT_TRUE(newline != nullptr)) {
            length = PtrDistance(src.cursor(), newline);
            if (length > 0 && newline[-1] == '\r') {
              return FoundNewline(src, dest, options, length - 1, 2);
            }
            return FoundNewline(src, dest, options, length, 1);
          }
          if (ABSL_PREDICT_TRUE(src.limit()[-1] != '\r')) {
            length = src.available();
            goto continue_reading;
          }
          // The buffer ends with CR.
          length = src.available() - 1;
          if (ABSL_PREDICT_TRUE(length > 0)) {
            // The CR is not first in the buffer. Move line read so far to
            // `dest` to avoid copying that part during flattening of the CR
            // together with the next buffer.
            goto continue_reading;
          }
          // The buffer contains only CR.
          if (ABSL_PREDICT_FALSE(!src.Pull(2))) {
            // The CR is the final character and is not a part of a line
            // terminator.
            if (ABSL_PREDICT_FALSE(options.max_length() < 1)) {
              return FailMaxLineLengthExceeded(src, dest, options.max_length());
            }
            ReadFlat(src, 1, dest);
            return src.ok();
          }
          // The buffer begins with CR.
          if (ABSL_PREDICT_TRUE(src.cursor()[1] == '\n')) {
            return FoundNewline(src, dest, options, 0, 2);
          }
          // The CR is not a part of a line terminator. Search for LF again.
          newline = static_cast<const char*>(
              std::memchr(src.cursor() + 2, '\n', src.available() - 2));
        }
      }
    }
    RIEGELI_ASSUME_UNREACHABLE()
        << "Unknown newline: " << static_cast<int>(options.newline());
  continue_reading:
    if (ABSL_PREDICT_FALSE(length > options.max_length())) {
      return FailMaxLineLengthExceeded(src, dest, options.max_length());
    }
    options.set_max_length(options.max_length() - length);
    ReadFlat(src, length, dest);
  } while (src.Pull());
  return src.ok();
}

}  // namespace

bool ReadLine(Reader& src, absl::string_view& dest, ReadLineOptions options) {
  size_t length = 0;
  if (ABSL_PREDICT_FALSE(!src.Pull())) {
    dest = absl::string_view();
    return false;
  }
  do {
    switch (options.newline()) {
      case ReadNewline::kLf: {
        const char* const newline = static_cast<const char*>(
            std::memchr(src.cursor() + length, '\n', src.available() - length));
        if (ABSL_PREDICT_TRUE(newline != nullptr)) {
          return FoundNewline(src, dest, options,
                              PtrDistance(src.cursor(), newline), 1);
        }
        goto continue_reading;
      }
      case ReadNewline::kCrLfOrLf:
        for (;;) {
          const char* const newline = static_cast<const char*>(std::memchr(
              src.cursor() + length, '\n', src.available() - length));
          if (ABSL_PREDICT_TRUE(newline != nullptr)) {
            length = PtrDistance(src.cursor(), newline);
            if (length > 0 && newline[-1] == '\r') {
              return FoundNewline(src, dest, options, length - 1, 2);
            }
            return FoundNewline(src, dest, options, length, 1);
          }
          if (ABSL_PREDICT_TRUE(src.limit()[-1] != '\r')) goto continue_reading;
          // The buffer ends with CR.
          length = src.available() - 1;
          if (ABSL_PREDICT_FALSE(length > options.max_length())) {
            return FailMaxLineLengthExceeded(src, dest, options.max_length());
          }
          if (ABSL_PREDICT_FALSE(!src.Pull(length + 2))) {
            // The CR is the final character and is not a part of a line
            // terminator.
            if (ABSL_PREDICT_FALSE(src.available() > options.max_length())) {
              return FailMaxLineLengthExceeded(src, dest, options.max_length());
            }
            dest = absl::string_view(src.cursor(), src.available());
            src.move_cursor(src.available());
            return src.ok();
          }
          if (ABSL_PREDICT_TRUE(src.cursor()[length + 1] == '\n')) {
            return FoundNewline(src, dest, options, length, 2);
          }
          // The CR at `src.cursor()[length]` is not a part of a line
          // terminator. Search for LF again.
          length += 2;
        }
    }
    RIEGELI_ASSUME_UNREACHABLE()
        << "Unknown newline: " << static_cast<int>(options.newline());
  continue_reading:
    length = src.available();
    if (ABSL_PREDICT_FALSE(length > options.max_length())) {
      return FailMaxLineLengthExceeded(src, dest, options.max_length());
    }
  } while (src.Pull(length + 1));
  dest = absl::string_view(src.cursor(), src.available());
  src.move_cursor(src.available());
  return src.ok();
}

bool ReadLine(Reader& src, std::string& dest, ReadLineOptions options) {
  dest.clear();
  return ReadLineInternal(src, dest, options);
}

bool ReadLine(Reader& src, Chain& dest, ReadLineOptions options) {
  dest.Clear();
  return ReadLineInternal(src, dest, options);
}

bool ReadLine(Reader& src, absl::Cord& dest, ReadLineOptions options) {
  dest.Clear();
  return ReadLineInternal(src, dest, options);
}

void SkipUtf8Bom(Reader& src) {
  if (src.Pull(kUtf8Bom.size()) &&
      absl::string_view(src.cursor(), kUtf8Bom.size()) == kUtf8Bom) {
    src.move_cursor(kUtf8Bom.size());
  }
}

}  // namespace riegeli


================================================
FILE: riegeli/lines/line_reading.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_LINES_LINE_READING_H_
#define RIEGELI_LINES_LINE_READING_H_

#include <stddef.h>

#include <limits>
#include <string>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/chain.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/lines/newline.h"

namespace riegeli {

// Options for `ReadLine()`.
class ReadLineOptions {
 public:
  ReadLineOptions() noexcept {}

  // Options can also be specified by the line terminator alone.
  /*implicit*/ ReadLineOptions(ReadNewline newline) : newline_(newline) {}

  // Line terminator representations to recognize.
  //
  // Default: `ReadNewline::kCrLfOrLf`.
  ReadLineOptions& set_newline(ReadNewline newline) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    newline_ = newline;
    return *this;
  }
  ReadLineOptions&& set_newline(ReadNewline newline) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_newline(newline));
  }
  ReadNewline newline() const { return newline_; }

  // If `false`, line terminators are stripped.
  //
  // If `true`, each returned line includes its terminator if it was present
  // (it can be absent in the last line).
  //
  // Default: `false`.
  ReadLineOptions& set_keep_newline(bool keep_newline) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    keep_newline_ = keep_newline;
    return *this;
  }
  ReadLineOptions&& set_keep_newline(bool keep_newline) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_keep_newline(keep_newline));
  }
  bool keep_newline() const { return keep_newline_; }

  // Expected maximum line length.
  //
  // If this length is exceeded, reading fails with
  // `absl::ResourceExhaustedError()`.
  //
  // Default: `std::numeric_limits<size_t>::max()`.
  ReadLineOptions& set_max_length(size_t max_length) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    max_length_ = max_length;
    return *this;
  }
  ReadLineOptions&& set_max_length(size_t max_length) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_max_length(max_length));
  }
  size_t max_length() const { return max_length_; }

 private:
  ReadNewline newline_ = ReadNewline::kCrLfOrLf;
  bool keep_newline_ = false;
  size_t max_length_ = std::numeric_limits<size_t>::max();
};

// Reads a line.
//
// Line terminator after the last line is optional.
//
// Return values:
//  * `true`                     - success (`dest` is set)
//  * `false` (when `src.ok()`)  - source ends (`dest` is empty)
//  * `false` (when `!src.ok()`) - failure (`dest` is set to the partial line
//                                 read before the failure)
bool ReadLine(Reader& src, absl::string_view& dest,
              ReadLineOptions options = ReadLineOptions());
bool ReadLine(Reader& src, std::string& dest,
              ReadLineOptions options = ReadLineOptions());
bool ReadLine(Reader& src, Chain& dest,
              ReadLineOptions options = ReadLineOptions());
bool ReadLine(Reader& src, absl::Cord& dest,
              ReadLineOptions options = ReadLineOptions());

// Skips UTF-8 BOM if it is present.
void SkipUtf8Bom(Reader& src);

}  // namespace riegeli

#endif  // RIEGELI_LINES_LINE_READING_H_


================================================
FILE: riegeli/lines/line_writing.h
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_LINES_LINE_WRITING_H_
#define RIEGELI_LINES_LINE_WRITING_H_

#include <stddef.h>

#include <tuple>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/type_traits.h"
#include "riegeli/bytes/stringify.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/lines/newline.h"

namespace riegeli {

// Options for `WriteLine()`.
class WriteLineOptions {
 public:
  WriteLineOptions() noexcept {}

  // Options can also be specified by the line terminator alone.
  /*implicit*/ WriteLineOptions(WriteNewline newline) : newline_(newline) {}

  // Line terminator representation to write.
  //
  // Default: `WriteNewline::kNative`.
  WriteLineOptions& set_newline(WriteNewline newline) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    newline_ = newline;
    return *this;
  }
  WriteLineOptions&& set_newline(WriteNewline newline) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_newline(newline));
  }
  WriteNewline newline() const { return newline_; }

 private:
  WriteNewline newline_ = WriteNewline::kNative;
};

// Writes stringified values, then a line terminator.
//
// The last one or two arguments are the `Writer&`, optionally followed by
// `WriteLineOptions`. The remaining arguments are the values.
//
// Return values:
//  * `true`  - success
//  * `false` - failure (`!ok()`)
template <typename... Args,
          std::enable_if_t<
              std::conjunction_v<
                  std::is_convertible<GetTypeFromEndT<1, Args&&...>, Writer&>,
                  TupleElementsSatisfy<RemoveTypesFromEndT<1, Args&&...>,
                                       IsStringifiable>>,
              int> = 0>
bool WriteLine(Args&&... args);
template <typename... Args,
          std::enable_if_t<
              std::conjunction_v<
                  std::is_convertible<GetTypeFromEndT<1, Args&&...>,
                                      WriteLineOptions>,
                  std::is_convertible<GetTypeFromEndT<2, Args&&...>, Writer&>,
                  TupleElementsSatisfy<RemoveTypesFromEndT<2, Args&&...>,
                                       IsStringifiable>>,
              int> = 0>
bool WriteLine(Args&&... args);

// Writes UTF-8 BOM.
void WriteUtf8Bom(Writer& dest);

// Implementation details follow.

namespace write_line_internal {

template <typename... Srcs>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool WriteLineInternal(
    ABSL_ATTRIBUTE_UNUSED std::tuple<Srcs...> srcs, Writer& dest,
    WriteLineOptions options) {
  if (ABSL_PREDICT_FALSE(!std::apply(
          [&](Srcs&&... srcs) {
            return dest.Write(std::forward<Srcs>(srcs)...);
          },
          std::move(srcs)))) {
    return false;
  }
  switch (options.newline()) {
    case WriteNewline::kLf:
      return dest.Write('\n');
    case WriteNewline::kCrLf:
      return dest.Write("\r\n");
  }
  RIEGELI_ASSUME_UNREACHABLE()
      << "Unknown newline: " << static_cast<int>(options.newline());
}

}  // namespace write_line_internal

template <typename... Args,
          std::enable_if_t<
              std::conjunction_v<
                  std::is_convertible<GetTypeFromEndT<1, Args&&...>, Writer&>,
                  TupleElementsSatisfy<RemoveTypesFromEndT<1, Args&&...>,
                                       IsStringifiable>>,
              int>>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool WriteLine(Args&&... args) {
  return write_line_internal::WriteLineInternal(
      RemoveFromEnd<1>(std::forward<Args>(args)...),
      GetFromEnd<1>(std::forward<Args>(args)...), WriteLineOptions());
}

template <typename... Args,
          std::enable_if_t<
              std::conjunction_v<
                  std::is_convertible<GetTypeFromEndT<1, Args&&...>,
                                      WriteLineOptions>,
                  std::is_convertible<GetTypeFromEndT<2, Args&&...>, Writer&>,
                  TupleElementsSatisfy<RemoveTypesFromEndT<2, Args&&...>,
                                       IsStringifiable>>,
              int>>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool WriteLine(Args&&... args) {
  return write_line_internal::WriteLineInternal(
      RemoveFromEnd<2>(std::forward<Args>(args)...),
      GetFromEnd<2>(std::forward<Args>(args)...),
      GetFromEnd<1>(std::forward<Args>(args)...));
}

inline void WriteUtf8Bom(Writer& dest) { dest.Write(kUtf8Bom); }

}  // namespace riegeli

#endif  // RIEGELI_LINES_LINE_WRITING_H_


================================================
FILE: riegeli/lines/newline.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_LINES_NEWLINE_H_
#define RIEGELI_LINES_NEWLINE_H_

#include "absl/strings/string_view.h"

namespace riegeli {

// Line terminator representations to recognize.
enum class ReadNewline {
  kLf,        //         LF          ("\n")
  kCrLfOrLf,  // CR-LF | LF ("\r\n" | "\n")
};

// Line terminator representation to write.
enum class WriteNewline {
  kLf,    // LF    ("\n")
  kCrLf,  // CR-LF ("\r\n")

#ifndef _WIN32
  kNative = kLf,
#else
  kNative = kCrLf,
#endif
};

// Native line representation as a string.
#ifndef _WIN32
inline constexpr absl::string_view kNewline = "\n";
#else
inline constexpr absl::string_view kNewline = "\r\n";
#endif

// UTF-8 BOM representation as a string.
inline constexpr absl::string_view kUtf8Bom = "\xef\xbb\xbf";

}  // namespace riegeli

#endif  // RIEGELI_LINES_NEWLINE_H_


================================================
FILE: riegeli/lines/text_reader.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/lines/text_reader.h"

#include <stddef.h>

#include <cstring>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/lines/newline.h"

namespace riegeli {

void TextReaderBase::Initialize(Reader* src) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of TextReader: null Reader pointer";
  if (ABSL_PREDICT_FALSE(!src->ok()) && src->available() == 0) {
    FailWithoutAnnotation(AnnotateOverSrc(src->status()));
    return;
  }
  initial_original_pos_ = src->pos();
}

absl::Status TextReaderBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    Reader& src = *SrcReader();
    status = src.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `src` with the original position.
  // Clarify that the current position is the position with LF newlines instead
  // of delegating to `BufferedReader::AnnotateStatusImpl()`.
  return AnnotateOverSrc(std::move(status));
}

absl::Status TextReaderBase::AnnotateOverSrc(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("with LF newlines at byte ", pos()));
  }
  return status;
}

bool TextReaderBase::ToleratesReadingAhead() {
  Reader* const src = SrcReader();
  return src != nullptr && src->ToleratesReadingAhead();
}

bool TextReaderBase::SupportsRewind() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsRewind();
}

bool TextReaderBase::SeekBehindBuffer(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "buffer not empty";
  if (new_pos <= limit_pos()) {
    // Seeking backwards.
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    Reader& src = *SrcReader();
    set_buffer();
    set_limit_pos(0);
    if (ABSL_PREDICT_FALSE(!src.Seek(initial_original_pos_))) {
      return FailWithoutAnnotation(AnnotateOverSrc(src.StatusOrAnnotate(
          absl::DataLossError("Zstd-compressed stream got truncated"))));
    }
    if (new_pos == 0) return true;
  }
  return BufferedReader::SeekBehindBuffer(new_pos);
}

namespace text_reader_internal {

void TextReaderImpl<ReadNewline::kCrLfOrLf>::Initialize(Reader* src) {
  pending_cr_ = false;
  TextReaderBase::Initialize(src);
}

bool TextReaderImpl<ReadNewline::kCrLfOrLf>::ReadInternal(size_t min_length,
                                                          size_t max_length,
                                                          char* dest) {
  RIEGELI_ASSERT_GT(min_length, 0u)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "nothing to read";
  RIEGELI_ASSERT_GE(max_length, min_length)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "max_length < min_length";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedReader::ReadInternal()";
  Reader& src = *SrcReader();
  for (;;) {
    if (ABSL_PREDICT_FALSE(!src.Pull(1, max_length))) {
      if (ABSL_PREDICT_FALSE(pending_cr_)) {
        pending_cr_ = false;
        dest[0] = '\r';
        move_limit_pos(1);
        return min_length <= 1;
      }
      return false;
    }
    size_t length;
    if (ABSL_PREDICT_FALSE(pending_cr_)) {
      pending_cr_ = false;
      if (src.cursor()[0] == '\n') {
        src.move_cursor(1);
        dest[0] = '\n';
      } else {
        dest[0] = '\r';
      }
      ++dest;
      length = 1;
    } else {
      length = UnsignedMin(src.available(), max_length);
      const char* cr_ptr =
          static_cast<const char*>(std::memchr(src.cursor(), '\r', length));
      if (cr_ptr == nullptr) {
        std::memcpy(dest, src.cursor(), length);
        src.move_cursor(length);
        dest += length;
      } else {
        length = PtrDistance(src.cursor(), cr_ptr);
        std::memcpy(dest, src.cursor(), length);
        if (ABSL_PREDICT_FALSE(cr_ptr == src.limit() - 1)) {
          src.move_cursor(length + 1);
          dest += length;
          pending_cr_ = true;
        } else {
          ++length;
          src.move_cursor(length);
          dest += length;
          if (cr_ptr[1] == '\n') {
            src.move_cursor(1);
            dest[-1] = '\n';
          } else {
            dest[-1] = '\r';
          }
        }
      }
    }
    move_limit_pos(length);
    if (length >= min_length) return true;
    min_length -= length;
    max_length -= length;
  }
}

bool TextReaderImpl<ReadNewline::kCrLfOrLf>::SeekBehindBuffer(
    Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "buffer not empty";
  if (new_pos <= limit_pos()) {
    // Seeking backwards.
    pending_cr_ = false;
  }
  return TextReaderBase::SeekBehindBuffer(new_pos);
}

}  // namespace text_reader_internal

}  // namespace riegeli


================================================
FILE: riegeli/lines/text_reader.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_LINES_TEXT_READER_H_
#define RIEGELI_LINES_TEXT_READER_H_

#include <stddef.h>

#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "riegeli/base/any.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_reader.h"
#include "riegeli/bytes/prefix_limiting_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/lines/newline.h"

namespace riegeli {

// Template parameter independent part of `TextReader<newline, Src>` when
// `newline != ReadNewline::kLf`.
class TextReaderBase : public BufferedReader {
 public:
  using Options = BufferOptions;

  // Returns the original `Reader`. Unchanged by `Close()`.
  virtual Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool ToleratesReadingAhead() override;
  bool SupportsRewind() override;

 protected:
  using BufferedReader::BufferedReader;

  TextReaderBase(TextReaderBase&& that) = default;
  TextReaderBase& operator=(TextReaderBase&& that) = default;

  void Initialize(Reader* src);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverSrc(absl::Status status);

  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool SeekBehindBuffer(Position new_pos) override;

 private:
  Position initial_original_pos_ = 0;
};

namespace text_reader_internal {

template <ReadNewline newline>
class TextReaderImpl;

template <>
class TextReaderImpl<ReadNewline::kCrLfOrLf> : public TextReaderBase {
 protected:
  using TextReaderBase::TextReaderBase;

  TextReaderImpl(TextReaderImpl&& that) = default;
  TextReaderImpl& operator=(TextReaderImpl&& that) = default;

  void Initialize(Reader* src);

  bool ReadInternal(size_t min_length, size_t max_length, char* dest) override;
  bool SeekBehindBuffer(Position new_pos) override;

 private:
  // If `true`, a CR at the end of a buffer has been read from the source.
  // If LF follows in the source, it will be skipped and LF will be written to
  // the destination, otherwise CR will be written to the destination.
  bool pending_cr_ = false;
};

}  // namespace text_reader_internal

// A `Reader` which converts line terminators from the given representation to
// LF after getting data from another `Reader`.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the original `Reader`. `Src` must support
// `Dependency<Reader*, Src>`, e.g. `Reader*` (not owned, default),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// By relying on CTAD the second template argument can be deduced as `TargetT`
// of the type of the first constructor argument.
//
// The original `Reader` must not be accessed until the `TextReader` is closed
// or no longer used.
//
// This primary class template is used when `newline != ReadNewline::kLf`.
template <ReadNewline newline = ReadNewline::kCrLfOrLf, typename Src = Reader*>
class TextReader : public text_reader_internal::TextReaderImpl<newline> {
 public:
  using Options = TextReaderBase::Options;

  // Creates a closed `TextReader`.
  explicit TextReader(Closed) noexcept : TextReader::TextReaderImpl(kClosed) {}

  // Will read from the original `Reader` provided by `src`.
  explicit TextReader(Initializer<Src> src, Options options = Options());

  TextReader(TextReader&& that) = default;
  TextReader& operator=(TextReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `TextReader`. This avoids
  // constructing a temporary `TextReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());

  // Returns the object providing and possibly owning the original `Reader`.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;
  void SetReadAllHintImpl(bool read_all_hint) override;
  void VerifyEndImpl() override;

 private:
  // The object providing and possibly owning the original `Reader`.
  Dependency<Reader*, Src> src_;
};

// Specialization of `TextReader<newline, Src>` when
// `newline == ReadNewline::kLf`.
//
// In contrast to the primary class template, this specialization exposes
// optional functionality of the original `Reader` (e.g. random access) and
// avoids adding a buffering layer.
template <typename Src>
class TextReader<ReadNewline::kLf, Src> : public PrefixLimitingReader<Src> {
 public:
  using Options = TextReaderBase::Options;

  // Creates a closed `TextReader`.
  explicit TextReader(Closed) noexcept
      : TextReader::PrefixLimitingReader(kClosed) {}

  // Will read from the original `Reader` provided by `src`.
  //
  // `options` are ignored in this class template specialization.
  explicit TextReader(Initializer<Src> src, Options options = Options());

  TextReader(TextReader&& that) = default;
  TextReader& operator=(TextReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `TextReader`. This avoids
  // constructing a temporary `TextReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());
};

explicit TextReader(Closed)
    -> TextReader<ReadNewline::kCrLfOrLf, DeleteCtad<Closed>>;
template <typename Src>
explicit TextReader(Src&& src,
                    TextReaderBase::Options options = TextReaderBase::Options())
    -> TextReader<ReadNewline::kCrLfOrLf, TargetT<Src>>;

// Wraps a `TextReader` for a line terminator specified at runtime.
template <typename Src = Reader*>
using AnyTextReader =
    Any<Reader*>::Inlining<TextReader<ReadNewline::kLf, Src>,
                           TextReader<ReadNewline::kCrLfOrLf, Src>>;

// Options for `MakeAnyTextReader()`.
class AnyTextReaderOptions : public BufferOptionsBase<AnyTextReaderOptions> {
 public:
  AnyTextReaderOptions() noexcept {}

  // Line terminator representation to translate from LF.
  //
  // Default: `ReadNewline::kCrLfOrLf`.
  AnyTextReaderOptions& set_newline(ReadNewline newline) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    newline_ = newline;
    return *this;
  }
  AnyTextReaderOptions&& set_newline(ReadNewline newline) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_newline(newline));
  }
  ReadNewline newline() const { return newline_; }

 private:
  ReadNewline newline_ = ReadNewline::kCrLfOrLf;
};

// Factory function for `AnyTextReader`.
//
// `src` supports `riegeli::Maker<Src>(args...)` to construct `Src` in-place.
template <
    typename Src,
    std::enable_if_t<TargetSupportsDependency<Reader*, Src>::value, int> = 0>
AnyTextReader<TargetT<Src>> MakeAnyTextReader(
    Src&& src, AnyTextReaderOptions options = AnyTextReaderOptions());

// Implementation details below.

template <ReadNewline newline, typename Src>
inline TextReader<newline, Src>::TextReader(Initializer<Src> src,
                                            Options options)
    : TextReader::TextReaderImpl(options), src_(std::move(src)) {
  this->Initialize(src_.get());
}

template <ReadNewline newline, typename Src>
inline void TextReader<newline, Src>::Reset(Closed) {
  TextReader::TextReaderImpl::Reset(kClosed);
  src_.Reset();
}

template <ReadNewline newline, typename Src>
inline void TextReader<newline, Src>::Reset(Initializer<Src> src,
                                            Options options) {
  TextReader::TextReaderImpl::Reset(options);
  src_.Reset(std::move(src));
  this->Initialize(src_.get());
}

template <ReadNewline newline, typename Src>
void TextReader<newline, Src>::Done() {
  TextReader::TextReaderImpl::Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      this->FailWithoutAnnotation(this->AnnotateOverSrc(src_->status()));
    }
  }
}

template <ReadNewline newline, typename Src>
void TextReader<newline, Src>::SetReadAllHintImpl(bool read_all_hint) {
  TextReader::TextReaderImpl::SetReadAllHintImpl(read_all_hint);
  if (src_.IsOwning()) src_->SetReadAllHint(read_all_hint);
}

template <ReadNewline newline, typename Src>
void TextReader<newline, Src>::VerifyEndImpl() {
  TextReader::TextReaderImpl::VerifyEndImpl();
  if (src_.IsOwning() && ABSL_PREDICT_TRUE(this->ok())) src_->VerifyEnd();
}

template <typename Src>
inline TextReader<ReadNewline::kLf, Src>::TextReader(
    Initializer<Src> src, ABSL_ATTRIBUTE_UNUSED Options options)
    : TextReader::PrefixLimitingReader(std::move(src)) {}

template <typename Src>
inline void TextReader<ReadNewline::kLf, Src>::Reset(Closed) {
  TextReader::PrefixLimitingReader::Reset(kClosed);
}

template <typename Src>
inline void TextReader<ReadNewline::kLf, Src>::Reset(
    Initializer<Src> src, ABSL_ATTRIBUTE_UNUSED Options options) {
  TextReader::PrefixLimitingReader::Reset(std::move(src));
}

template <typename Src,
          std::enable_if_t<TargetSupportsDependency<Reader*, Src>::value, int>>
AnyTextReader<TargetT<Src>> MakeAnyTextReader(Src&& src,
                                              AnyTextReaderOptions options) {
  switch (options.newline()) {
    case ReadNewline::kLf:
      return riegeli::Maker<TextReader<ReadNewline::kLf, TargetT<Src>>>(
          std::forward<Src>(src), options.buffer_options());
    case ReadNewline::kCrLfOrLf:
      return riegeli::Maker<TextReader<ReadNewline::kCrLfOrLf, TargetT<Src>>>(
          std::forward<Src>(src), options.buffer_options());
  }
  RIEGELI_ASSUME_UNREACHABLE()
      << "Unknown newline: " << static_cast<int>(options.newline());
}

}  // namespace riegeli

#endif  // RIEGELI_LINES_TEXT_READER_H_


================================================
FILE: riegeli/lines/text_writer.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/lines/text_writer.h"

#include <stddef.h>

#include <cstring>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/status.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/lines/line_writing.h"
#include "riegeli/lines/newline.h"

namespace riegeli {

absl::Status TextWriterBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    Writer& dest = *DestWriter();
    status = dest.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `dest` with the original position.
  // Clarify that the current position is the position with LF newlines instead
  // of delegating to `BufferedWriter::AnnotateStatusImpl()`.
  return AnnotateOverDest(std::move(status));
}

absl::Status TextWriterBase::AnnotateOverDest(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("with LF newlines at byte ", pos()));
  }
  return status;
}

namespace text_writer_internal {

template <WriteNewline newline>
bool TextWriterImpl<newline>::WriteInternal(absl::string_view src) {
  RIEGELI_ASSERT(!src.empty())
      << "Failed precondition of BufferedWriter::WriteInternal(): "
         "nothing to write";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedWriter::WriteInternal()";
  Writer& dest = *DestWriter();
  for (;;) {
    const char* const lf_ptr =
        static_cast<const char*>(std::memchr(src.data(), '\n', src.size()));
    if (lf_ptr == nullptr) break;
    const size_t length = PtrDistance(src.data(), lf_ptr);
    if (ABSL_PREDICT_FALSE(
            !WriteLine(absl::string_view(src.data(), length), dest, newline))) {
      return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
    }
    src.remove_prefix(length + 1);
    move_start_pos(length + 1);
  }
  if (ABSL_PREDICT_FALSE(!dest.Write(src))) {
    return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
  }
  move_start_pos(src.size());
  return true;
}

template class TextWriterImpl<WriteNewline::kCrLf>;

}  // namespace text_writer_internal

}  // namespace riegeli


================================================
FILE: riegeli/lines/text_writer.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_LINES_TEXT_WRITER_H_
#define RIEGELI_LINES_TEXT_WRITER_H_

#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/any.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/prefix_limiting_writer.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/lines/newline.h"

namespace riegeli {

// Template parameter independent part of `TextWriter<newline, Dest>` when
// `newline != WriteNewline::kLf`.
class TextWriterBase : public BufferedWriter {
 public:
  using Options = BufferOptions;

  // Returns the original `Writer`. Unchanged by `Close()`.
  virtual Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

 protected:
  using BufferedWriter::BufferedWriter;

  TextWriterBase(TextWriterBase&& that) = default;
  TextWriterBase& operator=(TextWriterBase&& that) = default;

  void Initialize(Writer* dest);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverDest(absl::Status status);

  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
};

namespace text_writer_internal {

template <WriteNewline newline>
class TextWriterImpl : public TextWriterBase {
 protected:
  using TextWriterBase::TextWriterBase;

  TextWriterImpl(TextWriterImpl&& that) = default;
  TextWriterImpl& operator=(TextWriterImpl&& that) = default;

  bool WriteInternal(absl::string_view src) override;
};

extern template class TextWriterImpl<WriteNewline::kCrLf>;

}  // namespace text_writer_internal

template <WriteNewline newline = WriteNewline::kNative, typename Dest = Writer*>
class TextWriter : public text_writer_internal::TextWriterImpl<newline> {
 public:
  using Options = TextWriterBase::Options;

  // Creates a closed `TextWriter`.
  explicit TextWriter(Closed) noexcept : TextWriter::TextWriterImpl(kClosed) {}

  // Will write to the original `Writer` provided by `dest`.
  explicit TextWriter(Initializer<Dest> dest, Options options = Options());

  TextWriter(TextWriter&& that) = default;
  TextWriter& operator=(TextWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `TextWriter`. This avoids
  // constructing a temporary `TextWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the original `Writer`.
  // Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  // The object providing and possibly owning the original `Writer`.
  Dependency<Writer*, Dest> dest_;
};

// Specialization of `TextWriter<newline, Dest>` when
// `newline == WriteNewline::kLf`.
//
// In contrast to the primary class template, this specialization exposes
// optional functionality of the original `Writer` (e.g. random access) and
// avoids adding a buffering layer.
template <typename Dest>
class TextWriter<WriteNewline::kLf, Dest> : public PrefixLimitingWriter<Dest> {
 public:
  using Options = TextWriterBase::Options;

  // Creates a closed `TextWriter`.
  explicit TextWriter(Closed) noexcept
      : TextWriter::PrefixLimitingWriter(kClosed) {}

  // Will write to the original `Writer` provided by `dest`.
  //
  // `options` are ignored in this class template specialization.
  explicit TextWriter(Initializer<Dest> dest, Options options = Options());

  TextWriter(TextWriter&& that) = default;
  TextWriter& operator=(TextWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `TextWriter`. This avoids
  // constructing a temporary `TextWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());
};

explicit TextWriter(Closed)
    -> TextWriter<WriteNewline::kNative, DeleteCtad<Closed>>;
template <typename Dest>
explicit TextWriter(Dest&& dest,
                    TextWriterBase::Options options = TextWriterBase::Options())
    -> TextWriter<WriteNewline::kNative, TargetT<Dest>>;

// Wraps a `TextWriter` for a line terminator specified at runtime.
template <typename Dest = Writer*>
using AnyTextWriter =
    Any<Writer*>::Inlining<TextWriter<WriteNewline::kLf, Dest>,
                           TextWriter<WriteNewline::kCrLf, Dest>>;

// Options for `MakeAnyTextWriter()`.
class AnyTextWriterOptions : public BufferOptionsBase<AnyTextWriterOptions> {
 public:
  AnyTextWriterOptions() noexcept {}

  // Line terminator representation to translate from LF.
  //
  // Default: `WriteNewline::kNative`.
  AnyTextWriterOptions& set_newline(WriteNewline newline) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    newline_ = newline;
    return *this;
  }
  AnyTextWriterOptions&& set_newline(WriteNewline newline) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_newline(newline));
  }
  WriteNewline newline() const { return newline_; }

 private:
  WriteNewline newline_ = WriteNewline::kNative;
};

// Factory function for `AnyTextWriter`.
//
// `dest` supports `riegeli::Maker<Dest>(args...)` to construct `Dest` in-place.
template <
    typename Dest,
    std::enable_if_t<TargetSupportsDependency<Writer*, Dest>::value, int> = 0>
AnyTextWriter<TargetT<Dest>> MakeAnyTextWriter(
    Dest&& dest, AnyTextWriterOptions options = AnyTextWriterOptions());

// Implementation details below.

inline void TextWriterBase::Initialize(Writer* dest) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of TextWriter: null Writer pointer";
  if (ABSL_PREDICT_FALSE(!dest->ok())) {
    FailWithoutAnnotation(AnnotateOverDest(dest->status()));
  }
}

template <WriteNewline newline, typename Dest>
inline TextWriter<newline, Dest>::TextWriter(Initializer<Dest> dest,
                                             Options options)
    : TextWriter::TextWriterImpl(options), dest_(std::move(dest)) {
  this->Initialize(dest_.get());
}

template <WriteNewline newline, typename Dest>
inline void TextWriter<newline, Dest>::Reset(Closed) {
  TextWriter::TextWriterImpl::Reset(kClosed);
  dest_.Reset();
}

template <WriteNewline newline, typename Dest>
inline void TextWriter<newline, Dest>::Reset(Initializer<Dest> dest,
                                             Options options) {
  TextWriter::TextWriterImpl::Reset(options);
  dest_.Reset(std::move(dest));
  this->Initialize(dest_.get());
}

template <WriteNewline newline, typename Dest>
void TextWriter<newline, Dest>::Done() {
  TextWriter::TextWriterImpl::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      this->FailWithoutAnnotation(this->AnnotateOverDest(dest_->status()));
    }
  }
}

template <WriteNewline newline, typename Dest>
bool TextWriter<newline, Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!TextWriter::TextWriterImpl::FlushImpl(flush_type))) {
    return false;
  }
  if (flush_type != FlushType::kFromObject || dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Flush(flush_type))) {
      return this->FailWithoutAnnotation(
          this->AnnotateOverDest(dest_->status()));
    }
  }
  return true;
}

template <typename Dest>
inline TextWriter<WriteNewline::kLf, Dest>::TextWriter(
    Initializer<Dest> dest, ABSL_ATTRIBUTE_UNUSED Options options)
    : TextWriter::PrefixLimitingWriter(std::move(dest)) {}

template <typename Dest>
inline void TextWriter<WriteNewline::kLf, Dest>::Reset(Closed) {
  TextWriter::PrefixLimitingWriter::Reset(kClosed);
}

template <typename Dest>
inline void TextWriter<WriteNewline::kLf, Dest>::Reset(
    Initializer<Dest> dest, ABSL_ATTRIBUTE_UNUSED Options options) {
  TextWriter::PrefixLimitingWriter::Reset(std::move(dest));
}

template <typename Dest,
          std::enable_if_t<TargetSupportsDependency<Writer*, Dest>::value, int>>
AnyTextWriter<TargetT<Dest>> MakeAnyTextWriter(Dest&& dest,
                                               AnyTextWriterOptions options) {
  switch (options.newline()) {
    case WriteNewline::kLf:
      return riegeli::Maker<TextWriter<WriteNewline::kLf, TargetT<Dest>>>(
          std::forward<Dest>(dest), options.buffer_options());
    case WriteNewline::kCrLf:
      return riegeli::Maker<TextWriter<WriteNewline::kCrLf, TargetT<Dest>>>(
          std::forward<Dest>(dest), options.buffer_options());
  }
  RIEGELI_ASSUME_UNREACHABLE()
      << "Unknown newline: " << static_cast<int>(options.newline());
}

}  // namespace riegeli

#endif  // RIEGELI_LINES_TEXT_WRITER_H_


================================================
FILE: riegeli/lz4/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "lz4_reader",
    srcs = ["lz4_reader.cc"],
    hdrs = ["lz4_reader.h"],
    # zstd_reader.cc has #define before #include to influence what the included
    # files provide.
    features = ["-use_header_modules"],
    deps = [
        ":lz4_dictionary",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:recycling_pool",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:buffer_options",
        "//riegeli/bytes:buffered_reader",
        "//riegeli/bytes:reader",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@lz4//:lz4_frame",
    ],
)

cc_library(
    name = "lz4_writer",
    srcs = ["lz4_writer.cc"],
    hdrs = ["lz4_writer.h"],
    # lz4_writer.cc has #define before #include to influence what the included
    # files provide.
    features = ["-use_header_modules"],
    deps = [
        ":lz4_dictionary",
        ":lz4_reader",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:recycling_pool",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:buffer_options",
        "//riegeli/bytes:buffered_writer",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@lz4",
        "@lz4//:lz4_frame",
    ],
)

cc_library(
    name = "lz4_dictionary",
    srcs = ["lz4_dictionary.cc"],
    hdrs = ["lz4_dictionary.h"],
    # lz4_dictionary.cc has #define before #include to influence what the
    # included files provide.
    features = ["-use_header_modules"],
    visibility = ["//visibility:private"],
    deps = [
        "//riegeli/base:bytes_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:shared_ptr",
        "@com_google_absl//absl/base",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
        "@lz4//:lz4_frame",
    ],
)


================================================
FILE: riegeli/lz4/lz4_dictionary.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Enables the experimental lz4 API:
//  * `LZ4F_createCDict()`
//  * `LZ4F_freeCDict()`
#define LZ4F_STATIC_LINKING_ONLY

#include "riegeli/lz4/lz4_dictionary.h"

#include <memory>

#include "absl/base/attributes.h"
#include "absl/base/call_once.h"
#include "absl/strings/string_view.h"
#include "lz4frame.h"
#include "riegeli/base/shared_ptr.h"

namespace riegeli {

void Lz4Dictionary::Repr::LZ4F_CDictDeleter::operator()(LZ4F_CDict* ptr) const {
  LZ4F_freeCDict(ptr);
}

inline const LZ4F_CDict* Lz4Dictionary::Repr::PrepareCompressionDictionary()
    const {
  absl::call_once(compression_once_, [&] {
    compression_dictionary_.reset(LZ4F_createCDict(data_.data(), data_.size()));
  });
  return compression_dictionary_.get();
}

const LZ4F_CDict* Lz4Dictionary::PrepareCompressionDictionary() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  if (repr_ == nullptr) return nullptr;
  return repr_->PrepareCompressionDictionary();
}

}  // namespace riegeli


================================================
FILE: riegeli/lz4/lz4_dictionary.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_LZ4_LZ4_DICTIONARY_H_
#define RIEGELI_LZ4_LZ4_DICTIONARY_H_

// IWYU pragma: private, include "riegeli/lz4/lz4_reader.h"
// IWYU pragma: private, include "riegeli/lz4/lz4_writer.h"

#include <stdint.h>

#include <memory>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/call_once.h"
#include "absl/strings/string_view.h"
#include "lz4frame.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/shared_ptr.h"

// Copied here because the definition in `lz4frame.h` is guarded with
// `LZ4F_STATIC_LINKING_ONLY` which should not leak to the header.
typedef struct LZ4F_CDict_s LZ4F_CDict;

namespace riegeli {

// Stores an optional Lz4 dictionary for compression and decompression.
//
// An empty dictionary is equivalent to having no dictionary.
//
// A `Lz4Dictionary` object can own the dictionary data, or can hold a pointer
// to unowned dictionary data which must not be changed until the last
// `Lz4Reader` or `Lz4Writer` using this dictionary is closed or no longer used.
// A `Lz4Dictionary` object also holds prepared structures derived from
// dictionary data. If the same dictionary is needed for multiple compression
// or decompression sessions, the `Lz4Dictionary` object can be reused to avoid
// preparing them again for compression.
//
// Copying a `Lz4Dictionary` object is cheap, sharing the actual dictionary.
class Lz4Dictionary {
 public:
  // Creates an empty `Lz4Dictionary`.
  Lz4Dictionary() = default;

  Lz4Dictionary(const Lz4Dictionary& that) = default;
  Lz4Dictionary& operator=(const Lz4Dictionary& that) = default;

  Lz4Dictionary(Lz4Dictionary&& that) = default;
  Lz4Dictionary& operator=(Lz4Dictionary&& that) = default;

  // Resets the `Lz4Dictionary` to the empty state.
  Lz4Dictionary& Reset() & ABSL_ATTRIBUTE_LIFETIME_BOUND;
  Lz4Dictionary&& Reset() && ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(Reset());
  }

  // Sets a dictionary.
  //
  // Dictionary id can help to detect whether the correct dictionary is used.
  // 0 means unspecified.
  Lz4Dictionary& set_data(BytesInitializer data, uint32_t dict_id = 0) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND;
  Lz4Dictionary&& set_data(BytesInitializer data, uint32_t dict_id = 0) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_data(std::move(data), dict_id));
  }

  // Like `set_data()`, but does not take ownership of `data`, which must not
  // be changed until the last `Lz4Reader` or `Lz4Writer` using this dictionary
  // is closed or no longer used.
  Lz4Dictionary& set_data_unowned(absl::string_view data
                                      ABSL_ATTRIBUTE_LIFETIME_BOUND,
                                  uint32_t dict_id = 0) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND;
  Lz4Dictionary&& set_data_unowned(absl::string_view data
                                       ABSL_ATTRIBUTE_LIFETIME_BOUND,
                                   uint32_t dict_id = 0) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_data_unowned(data, dict_id));
  }

  // Returns `true` if no dictionary is present.
  bool empty() const;

  // Returns the dictionary data.
  absl::string_view data() const ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Returns the dictionary id.
  //
  // Dictionary id can help to detect whether the correct dictionary is used.
  // 0 means unspecified.
  uint32_t dict_id() const;

  // Returns the compression dictionary in the prepared form, or `nullptr` if
  // no dictionary is present or `LZ4F_createCDict()` failed.
  //
  // The dictionary is owned by `*this`.
  const LZ4F_CDict* PrepareCompressionDictionary() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND;

 private:
  enum class Ownership { kCopied, kUnowned };

  class Repr;

  SharedPtr<const Repr> repr_;
};

// Implementation details follow.

class Lz4Dictionary::Repr {
 public:
  // Owns a copy of `data`.
  explicit Repr(BytesInitializer data,
                std::integral_constant<Ownership, Ownership::kCopied>,
                uint32_t dict_id)
      : owned_data_(std::move(data)), data_(owned_data_), dict_id_(dict_id) {}

  // Does not take ownership of `data`, which must not be changed until the
  // last `Lz4Reader` or `Lz4Writer` using this dictionary is closed or no
  // longer used.
  explicit Repr(absl::string_view data,
                std::integral_constant<Ownership, Ownership::kUnowned>,
                uint32_t dict_id)
      : data_(data), dict_id_(dict_id) {}

  Repr(const Repr&) = delete;
  Repr& operator=(const Repr&) = delete;

  // Returns the compression dictionary in the prepared form, or `nullptr` if
  // no dictionary is present or `LZ4F_createCDict()` failed.
  //
  // The dictionary is owned by `*this`.
  const LZ4F_CDict* PrepareCompressionDictionary() const;

  absl::string_view data() const { return data_; }
  uint32_t dict_id() const { return dict_id_; }

 private:
  struct LZ4F_CDictDeleter {
    void operator()(LZ4F_CDict* ptr) const;
  };

  std::string owned_data_;
  absl::string_view data_;
  uint32_t dict_id_;

  mutable absl::once_flag compression_once_;
  mutable std::unique_ptr<LZ4F_CDict, LZ4F_CDictDeleter>
      compression_dictionary_;
};

inline Lz4Dictionary& Lz4Dictionary::Reset() & ABSL_ATTRIBUTE_LIFETIME_BOUND {
  repr_.Reset();
  return *this;
}

inline Lz4Dictionary& Lz4Dictionary::set_data(BytesInitializer data,
                                              uint32_t dict_id) &
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  repr_.Reset(riegeli::Maker(
      std::move(data), std::integral_constant<Ownership, Ownership::kCopied>(),
      dict_id));
  return *this;
}

inline Lz4Dictionary& Lz4Dictionary::set_data_unowned(
    absl::string_view data ABSL_ATTRIBUTE_LIFETIME_BOUND, uint32_t dict_id) &
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  repr_.Reset(riegeli::Maker(
      data, std::integral_constant<Ownership, Ownership::kUnowned>(), dict_id));
  return *this;
}

inline bool Lz4Dictionary::empty() const {
  return repr_ == nullptr || repr_->data().empty();
}

inline absl::string_view Lz4Dictionary::data() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  if (repr_ == nullptr) return absl::string_view();
  return repr_->data();
}

inline uint32_t Lz4Dictionary::dict_id() const {
  if (repr_ == nullptr) return 0;
  return repr_->dict_id();
}

}  // namespace riegeli

#endif  // RIEGELI_LZ4_LZ4_DICTIONARY_H_


================================================
FILE: riegeli/lz4/lz4_reader.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Enables the experimental lz4 API:
//  * `LZ4F_decompress_usingDict()`
#define LZ4F_STATIC_LINKING_ONLY

#include "riegeli/lz4/lz4_reader.h"

#include <stddef.h>

#include <limits>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "lz4frame.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_reader.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

void Lz4ReaderBase::Initialize(Reader* src) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of Lz4Reader: null Reader pointer";
  if (ABSL_PREDICT_FALSE(!src->ok()) && src->available() == 0) {
    FailWithoutAnnotation(AnnotateOverSrc(src->status()));
    return;
  }
  initial_compressed_pos_ = src->pos();
  InitializeDecompressor(*src);
}

inline void Lz4ReaderBase::InitializeDecompressor(Reader& src) {
  LZ4F_errorCode_t result = 0;
  decompressor_ = RecyclingPool<LZ4F_dctx, LZ4F_dctxDeleter>::global(
                      recycling_pool_options_)
                      .Get(
                          [&result] {
                            LZ4F_dctx* decompressor = nullptr;
                            result = LZ4F_createDecompressionContext(
                                &decompressor, LZ4F_VERSION);
                            return std::unique_ptr<LZ4F_dctx, LZ4F_dctxDeleter>(
                                decompressor);
                          },
                          [](LZ4F_dctx* decompressor) {
                            LZ4F_resetDecompressionContext(decompressor);
                          });
  if (ABSL_PREDICT_FALSE(LZ4F_isError(result))) {
    Fail(absl::InternalError(
        absl::StrCat("LZ4F_createDecompressionContext() failed: ",
                     LZ4F_getErrorName(result))));
    return;
  }
  ReadHeader(src);
}

inline bool Lz4ReaderBase::ReadHeader(Reader& src) {
  if (ABSL_PREDICT_FALSE(!src.Pull(LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH,
                                   LZ4F_HEADER_SIZE_MAX))) {
    if (ABSL_PREDICT_FALSE(!src.ok())) {
      return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
    }
    if (ABSL_PREDICT_FALSE(!concatenate_)) {
      if (!growing_source_) {
        Fail(absl::InvalidArgumentError("Truncated Lz4-compressed stream"));
      }
      truncated_ = true;
    }
    return false;
  }
  const size_t header_size = LZ4F_headerSize(src.cursor(), src.available());
  if (ABSL_PREDICT_FALSE(LZ4F_isError(header_size))) {
    return Fail(absl::InvalidArgumentError(absl::StrCat(
        "LZ4F_headerSize() failed: ", LZ4F_getErrorName(header_size))));
  }
  if (ABSL_PREDICT_FALSE(!src.Pull(header_size))) {
    if (ABSL_PREDICT_FALSE(!src.ok())) {
      return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
    }
    if (ABSL_PREDICT_FALSE(!concatenate_)) {
      if (!growing_source_) {
        Fail(absl::InvalidArgumentError("Truncated Lz4-compressed stream"));
      }
      truncated_ = true;
    }
    return false;
  }
  LZ4F_frameInfo_t frame_info;
  size_t length = src.available();
  const size_t result = LZ4F_getFrameInfo(decompressor_.get(), &frame_info,
                                          src.cursor(), &length);
  if (ABSL_PREDICT_FALSE(LZ4F_isError(result))) {
    return Fail(absl::InvalidArgumentError(absl::StrCat(
        "LZ4F_getFrameInfo() failed: ", LZ4F_getErrorName(result))));
  }
  src.move_cursor(length);
  header_read_ = true;

  if (!concatenate_ && frame_info.contentSize > 0) {
    set_exact_size(frame_info.contentSize);
  }
  if (frame_info.dictID > 0 &&
      ABSL_PREDICT_FALSE(frame_info.dictID != dictionary_.dict_id())) {
    if (dictionary_.empty()) {
      return Fail(absl::InvalidArgumentError(absl::StrCat(
          "Missing dictionary: expected dict_id ", frame_info.dictID)));
    }
    if (dictionary_.dict_id() > 0) {
      return Fail(absl::InvalidArgumentError(
          absl::StrCat("Wrong dictionary: expected dict_id ", frame_info.dictID,
                       ", have dict_id ", dictionary_.dict_id())));
    }
    // Dictionary is present but has `dict_id() == 0`. Hopefully it is the right
    // dictionary.
  }
  return true;
}

void Lz4ReaderBase::Done() {
  if (ABSL_PREDICT_FALSE(truncated_) && growing_source_) {
    Reader& src = *SrcReader();
    FailWithoutAnnotation(AnnotateOverSrc(src.AnnotateStatus(
        absl::InvalidArgumentError("Truncated Lz4-compressed stream"))));
  }
  BufferedReader::Done();
  decompressor_.reset();
  dictionary_ = Lz4Dictionary();
}

absl::Status Lz4ReaderBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    if (ABSL_PREDICT_FALSE(truncated_)) {
      status = Annotate(status, "reading truncated Lz4-compressed stream");
    }
    Reader& src = *SrcReader();
    status = src.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `src` with the compressed position.
  // Clarify that the current position is the uncompressed position instead of
  // delegating to `BufferedReader::AnnotateStatusImpl()`.
  return AnnotateOverSrc(std::move(status));
}

absl::Status Lz4ReaderBase::AnnotateOverSrc(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("at uncompressed byte ", pos()));
  }
  return status;
}

bool Lz4ReaderBase::ReadInternal(size_t min_length, size_t max_length,
                                 char* dest) {
  RIEGELI_ASSERT_GT(min_length, 0u)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "nothing to read";
  RIEGELI_ASSERT_GE(max_length, min_length)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "max_length < min_length";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedReader::ReadInternal()";
  Reader& src = *SrcReader();
  truncated_ = false;
  if (ABSL_PREDICT_FALSE(!header_read_)) {
    if (ABSL_PREDICT_FALSE(!ReadHeader(src))) return false;
  }
  LZ4F_decompressOptions_t decompress_options{};
  size_t effective_min_length = min_length;
  if (!growing_source_ && exact_size() != std::nullopt &&
      max_length >= SaturatingSub(*exact_size(), limit_pos())) {
    // Avoid a memory copy from an internal buffer of the Lz4 engine to `dest`
    // by promising to decompress all remaining data to `dest`.
    decompress_options.stableDst = 1;
    effective_min_length = std::numeric_limits<size_t>::max();
  }
  max_length = UnsignedMin(max_length,
                           std::numeric_limits<Position>::max() - limit_pos());
  for (;;) {
    size_t src_length = src.available();
    size_t dest_length = max_length;
    const size_t result = LZ4F_decompress_usingDict(
        decompressor_.get(), dest, &dest_length, src.cursor(), &src_length,
        dictionary_.data().data(), dictionary_.data().size(),
        &decompress_options);
    src.move_cursor(src_length);
    move_limit_pos(dest_length);
    if (ABSL_PREDICT_FALSE(result == 0)) {
      if (concatenate_) {
        header_read_ = false;
        if (dest_length >= min_length) return true;
        dest += dest_length;
        min_length -= dest_length;
        max_length -= dest_length;
        effective_min_length -= dest_length;
        if (ABSL_PREDICT_FALSE(!ReadHeader(src))) return false;
        continue;
      }
      decompressor_.reset();
      // Avoid `BufferedReader` allocating another buffer.
      set_exact_size(limit_pos());
      return dest_length >= min_length;
    }
    if (ABSL_PREDICT_FALSE(LZ4F_isError(result))) {
      Fail(absl::InvalidArgumentError(absl::StrCat(
          "LZ4F_decompress_usingDict() failed: ", LZ4F_getErrorName(result))));
      return dest_length >= min_length;
    }
    if (dest_length >= effective_min_length) return true;
    if (ABSL_PREDICT_FALSE(src.available() > 0)) {
      RIEGELI_ASSERT_EQ(dest_length, max_length)
          << "LZ4F_decompress_usingDict() returned but there are still "
             "input data and output space";
      RIEGELI_ASSERT_EQ(dest_length,
                        std::numeric_limits<Position>::max() - limit_pos())
          << "The position does not overflow but the output buffer is full, "
             "while less than min_length was output, which implies that "
             "LZ4F_decompress_usingDict() wants to output more than the "
             "expected decompressed size to a flat buffer";
      return FailOverflow();
    }
    if (ABSL_PREDICT_FALSE(!src.Pull(1, result))) {
      if (ABSL_PREDICT_FALSE(!src.ok())) {
        FailWithoutAnnotation(AnnotateOverSrc(src.status()));
      } else {
        if (!growing_source_) {
          Fail(absl::InvalidArgumentError("Truncated Lz4-compressed stream"));
        }
        truncated_ = true;
      }
      return dest_length >= min_length;
    }
    dest += dest_length;
    min_length = SaturatingSub(min_length, dest_length);
    max_length -= dest_length;
    effective_min_length -= dest_length;
  }
}

void Lz4ReaderBase::ExactSizeReached() {
  if (decompressor_ == nullptr) return;
  char buffer[1];
  if (ABSL_PREDICT_FALSE(Lz4ReaderBase::ReadInternal(1, 1, buffer))) {
    decompressor_.reset();
    Fail(absl::FailedPreconditionError(
        "Uncompressed size reached but more data can be decompressed, "
        "which implies that seeking back and reading again encountered "
        "changed Lz4-compressed data"));
  }
}

bool Lz4ReaderBase::ToleratesReadingAhead() {
  Reader* const src = SrcReader();
  return src != nullptr && src->ToleratesReadingAhead();
}

bool Lz4ReaderBase::SupportsRewind() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsRewind();
}

bool Lz4ReaderBase::SeekBehindBuffer(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "buffer not empty";
  if (new_pos <= limit_pos()) {
    // Seeking backwards.
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    Reader& src = *SrcReader();
    truncated_ = false;
    set_buffer();
    set_limit_pos(0);
    decompressor_.reset();
    if (ABSL_PREDICT_FALSE(!src.Seek(initial_compressed_pos_))) {
      return FailWithoutAnnotation(AnnotateOverSrc(src.StatusOrAnnotate(
          absl::DataLossError("Lz4-compressed stream got truncated"))));
    }
    InitializeDecompressor(src);
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    if (new_pos == 0) return true;
  }
  return BufferedReader::SeekBehindBuffer(new_pos);
}

bool Lz4ReaderBase::SupportsNewReader() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsNewReader();
}

std::unique_ptr<Reader> Lz4ReaderBase::NewReaderImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point
  // if `SrcReader()->SupportsNewReader()`.
  Reader& src = *SrcReader();
  std::unique_ptr<Reader> compressed_reader =
      src.NewReader(initial_compressed_pos_);
  if (ABSL_PREDICT_FALSE(compressed_reader == nullptr)) {
    FailWithoutAnnotation(AnnotateOverSrc(src.status()));
    return nullptr;
  }
  std::unique_ptr<Reader> reader =
      std::make_unique<Lz4Reader<std::unique_ptr<Reader>>>(
          std::move(compressed_reader),
          Lz4ReaderBase::Options()
              .set_growing_source(growing_source_)
              .set_concatenate(concatenate_)
              .set_dictionary(dictionary_)
              .set_buffer_options(buffer_options())
              .set_recycling_pool_options(recycling_pool_options_));
  reader->Seek(initial_pos);
  return reader;
}

namespace lz4_internal {

inline bool GetFrameInfo(Reader& src, LZ4F_frameInfo_t& frame_info,
                         const RecyclingPoolOptions& recycling_pool_options) {
  using LZ4F_dctxDeleter = Lz4ReaderBase::LZ4F_dctxDeleter;
  RecyclingPool<LZ4F_dctx, LZ4F_dctxDeleter>::Handle decompressor;
  {
    LZ4F_errorCode_t result = 0;
    decompressor =
        RecyclingPool<LZ4F_dctx, LZ4F_dctxDeleter>::global(
            recycling_pool_options)
            .Get(
                [&result] {
                  LZ4F_dctx* decompressor = nullptr;
                  result = LZ4F_createDecompressionContext(&decompressor,
                                                           LZ4F_VERSION);
                  return std::unique_ptr<LZ4F_dctx, LZ4F_dctxDeleter>(
                      decompressor);
                },
                [](LZ4F_dctx* decompressor) {
                  LZ4F_resetDecompressionContext(decompressor);
                });
    if (ABSL_PREDICT_FALSE(LZ4F_isError(result))) return false;
  }
  if (ABSL_PREDICT_FALSE(!src.Pull(LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH,
                                   LZ4F_HEADER_SIZE_MAX))) {
    return false;
  }
  const size_t header_size = LZ4F_headerSize(src.cursor(), src.available());
  if (ABSL_PREDICT_FALSE(LZ4F_isError(header_size))) return false;
  if (ABSL_PREDICT_FALSE(!src.Pull(header_size))) return false;
  size_t length;
  const size_t result =
      LZ4F_getFrameInfo(decompressor.get(), &frame_info, src.cursor(), &length);
  return !LZ4F_isError(result);
}

}  // namespace lz4_internal

bool RecognizeLz4(Reader& src,
                  const RecyclingPoolOptions& recycling_pool_options) {
  LZ4F_frameInfo_t frame_info;
  return lz4_internal::GetFrameInfo(src, frame_info, recycling_pool_options);
}

std::optional<Position> Lz4UncompressedSize(
    Reader& src, const RecyclingPoolOptions& recycling_pool_options) {
  LZ4F_frameInfo_t frame_info;
  if (!lz4_internal::GetFrameInfo(src, frame_info, recycling_pool_options)) {
    return std::nullopt;
  }
  if (frame_info.contentSize > 0) return frame_info.contentSize;
  return std::nullopt;
}

}  // namespace riegeli


================================================
FILE: riegeli/lz4/lz4_reader.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_LZ4_LZ4_READER_H_
#define RIEGELI_LZ4_LZ4_READER_H_

#include <stddef.h>

#include <memory>
#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "lz4.h"
#include "lz4frame.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/lz4/lz4_dictionary.h"  // IWYU pragma: export

namespace riegeli {

namespace lz4_internal {

bool GetFrameInfo(Reader& src, LZ4F_frameInfo_t& frame_info,
                  const RecyclingPoolOptions& recycling_pool_options);

}  // namespace lz4_internal

// Template parameter independent part of `Lz4Reader`.
class Lz4ReaderBase : public BufferedReader {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // If `true`, supports decompressing as much as possible from a truncated
    // source, then retrying when the source has grown. This has a small
    // performance penalty.
    //
    // Default: `false`.
    Options& set_growing_source(bool growing_source) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      growing_source_ = growing_source;
      return *this;
    }
    Options&& set_growing_source(bool growing_source) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_growing_source(growing_source));
    }
    bool growing_source() const { return growing_source_; }

    // If `true`, concatenated compressed frames are decoded to concatenation
    // of their decompressed contents. An empty compressed stream is decoded to
    // empty decompressed contents.
    //
    // If `false`, exactly one compressed frame is consumed.
    //
    // Default: `false`.
    Options& set_concatenate(bool concatenate) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      concatenate_ = concatenate;
      return *this;
    }
    Options&& set_concatenate(bool concatenate) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_concatenate(concatenate));
    }
    bool concatenate() const { return concatenate_; }

    // Lz4 dictionary. The same dictionary must have been used for compression,
    // except that it is allowed to supply a dictionary even if no dictionary
    // was used for compression.
    //
    // Default: `Lz4Dictionary()`.
    Options& set_dictionary(Lz4Dictionary dictionary) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      dictionary_ = std::move(dictionary);
      return *this;
    }
    Options&& set_dictionary(Lz4Dictionary dictionary) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_dictionary(std::move(dictionary)));
    }
    Lz4Dictionary& dictionary() ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return dictionary_;
    }
    const Lz4Dictionary& dictionary() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return dictionary_;
    }

    // Options for a global `RecyclingPool` of decompression contexts.
    //
    // They tune the amount of memory which is kept to speed up creation of new
    // decompression sessions, and usage of a background thread to clean it.
    //
    // Default: `RecyclingPoolOptions()`.
    Options& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      recycling_pool_options_ = recycling_pool_options;
      return *this;
    }
    Options&& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_recycling_pool_options(recycling_pool_options));
    }
    const RecyclingPoolOptions& recycling_pool_options() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recycling_pool_options_;
    }

   private:
    bool growing_source_ = false;
    bool concatenate_ = false;
    Lz4Dictionary dictionary_;
    RecyclingPoolOptions recycling_pool_options_
#if LZ4_VERSION_NUMBER <= 10904
        // Workaround for https://github.com/lz4/lz4/issues/1227.
        = RecyclingPoolOptions().set_max_size(0)
#endif
        ;
  };

  // Returns the compressed `Reader`. Unchanged by `Close()`.
  virtual Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns `true` if the source is truncated (without a clean end of the
  // compressed stream) at the current position. In such case, if the source
  // does not grow, `Close()` will fail.
  //
  // Precondition: `Options::concatenate()` was `false`.
  bool truncated() const {
    RIEGELI_ASSERT(!concatenate_)
        << "Failed precondition of Lz4ReaderBase::truncated(): "
           "Options::concatenate() is true";
    return truncated_ && available() == 0;
  }

  bool ToleratesReadingAhead() override;
  bool SupportsRewind() override;
  bool SupportsNewReader() override;

 protected:
  explicit Lz4ReaderBase(Closed) noexcept : BufferedReader(kClosed) {}

  explicit Lz4ReaderBase(BufferOptions buffer_options, bool growing_source,
                         bool concatenate, Lz4Dictionary&& dictionary,
                         const RecyclingPoolOptions& recycling_pool_options);

  Lz4ReaderBase(Lz4ReaderBase&& that) noexcept;
  Lz4ReaderBase& operator=(Lz4ReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options, bool growing_source,
             bool concatenate, Lz4Dictionary&& dictionary,
             const RecyclingPoolOptions& recycling_pool_options);
  void Initialize(Reader* src);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverSrc(absl::Status status);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool ReadInternal(size_t min_length, size_t max_length, char* dest) override;
  void ExactSizeReached() override;
  bool SeekBehindBuffer(Position new_pos) override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;

 private:
  // For `LZ4F_dctxDeleter`.
  friend bool lz4_internal::GetFrameInfo(
      Reader& src, LZ4F_frameInfo_t& frame_info,
      const RecyclingPoolOptions& recycling_pool_options);

  struct LZ4F_dctxDeleter {
    void operator()(LZ4F_dctx* ptr) const {
      const LZ4F_errorCode_t result = LZ4F_freeDecompressionContext(ptr);
      RIEGELI_ASSERT(!LZ4F_isError(result))
          << "LZ4F_freeDecompressionContext() failed: "
          << LZ4F_getErrorName(result);
    }
  };

  void InitializeDecompressor(Reader& src);
  bool ReadHeader(Reader& src);

  // If `true`, supports decompressing as much as possible from a truncated
  // source, then retrying when the source has grown.
  bool growing_source_ = false;
  bool concatenate_ = false;
  Position initial_compressed_pos_ = 0;
  // If `true`, the source is truncated (without a clean end of the compressed
  // stream) at the current position. If the source does not grow, `Close()`
  // will fail.
  bool truncated_ = false;
  // If `true`, the frame header has been read.
  bool header_read_ = false;
  Lz4Dictionary dictionary_;
  RecyclingPoolOptions recycling_pool_options_;
  // If `ok()` but `decompressor_ == nullptr` then all data have been
  // decompressed, `exact_size() == limit_pos()`, and `ReadInternal()` must not
  // be called again.
  RecyclingPool<LZ4F_dctx, LZ4F_dctxDeleter>::Handle decompressor_;
};

// A `Reader` which decompresses data with Lz4 after getting it from another
// `Reader`.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the compressed `Reader`. `Src` must support
// `Dependency<Reader*, Src>`, e.g. `Reader*` (not owned, default),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The compressed `Reader` must not be accessed until the `Lz4Reader` is closed
// or no longer used.
template <typename Src = Reader*>
class Lz4Reader : public Lz4ReaderBase {
 public:
  // Creates a closed `Lz4Reader`.
  explicit Lz4Reader(Closed) noexcept : Lz4ReaderBase(kClosed) {}

  // Will read from the compressed `Reader` provided by `src`.
  explicit Lz4Reader(Initializer<Src> src, Options options = Options());

  Lz4Reader(Lz4Reader&& that) = default;
  Lz4Reader& operator=(Lz4Reader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `Lz4Reader`. This avoids
  // constructing a temporary `Lz4Reader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());

  // Returns the object providing and possibly owning the compressed `Reader`.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;
  void SetReadAllHintImpl(bool read_all_hint) override;
  void VerifyEndImpl() override;

 private:
  // The object providing and possibly owning the compressed `Reader`.
  Dependency<Reader*, Src> src_;
};

explicit Lz4Reader(Closed) -> Lz4Reader<DeleteCtad<Closed>>;
template <typename Src>
explicit Lz4Reader(Src&& src,
                   Lz4ReaderBase::Options options = Lz4ReaderBase::Options())
    -> Lz4Reader<TargetT<Src>>;

// Returns `true` if the data look like they have been Lz4-compressed.
//
// The current position of `src` is unchanged.
bool RecognizeLz4(Reader& src,
                  const RecyclingPoolOptions& recycling_pool_options =
                      RecyclingPoolOptions());

// Returns the claimed uncompressed size of Lz4-compressed data.
//
// If the data consists of multiple frames, only the first frame is considered.
//
// Returns `std::nullopt` if the size was not stored or on failure. The size is
// stored if `Lz4WriterBase::Options::pledged_size() != std::nullopt`.
//
// The current position of `src` is unchanged.
std::optional<Position> Lz4UncompressedSize(
    Reader& src, const RecyclingPoolOptions& recycling_pool_options =
                     RecyclingPoolOptions());

// Implementation details follow.

inline Lz4ReaderBase::Lz4ReaderBase(
    BufferOptions buffer_options, bool growing_source, bool concatenate,
    Lz4Dictionary&& dictionary,
    const RecyclingPoolOptions& recycling_pool_options)
    : BufferedReader(buffer_options),
      growing_source_(growing_source),
      concatenate_(concatenate),
      dictionary_(std::move(dictionary)),
      recycling_pool_options_(recycling_pool_options) {}

inline Lz4ReaderBase::Lz4ReaderBase(Lz4ReaderBase&& that) noexcept
    : BufferedReader(static_cast<BufferedReader&&>(that)),
      growing_source_(that.growing_source_),
      concatenate_(that.concatenate_),
      initial_compressed_pos_(that.initial_compressed_pos_),
      truncated_(that.truncated_),
      header_read_(that.header_read_),
      dictionary_(std::move(that.dictionary_)),
      recycling_pool_options_(that.recycling_pool_options_),
      decompressor_(std::move(that.decompressor_)) {}

inline Lz4ReaderBase& Lz4ReaderBase::operator=(Lz4ReaderBase&& that) noexcept {
  BufferedReader::operator=(static_cast<BufferedReader&&>(that));
  growing_source_ = that.growing_source_;
  concatenate_ = that.concatenate_;
  initial_compressed_pos_ = that.initial_compressed_pos_;
  truncated_ = that.truncated_;
  header_read_ = that.header_read_;
  dictionary_ = std::move(that.dictionary_);
  recycling_pool_options_ = that.recycling_pool_options_;
  decompressor_ = std::move(that.decompressor_);
  return *this;
}

inline void Lz4ReaderBase::Reset(Closed) {
  BufferedReader::Reset(kClosed);
  growing_source_ = false;
  concatenate_ = false;
  initial_compressed_pos_ = 0;
  truncated_ = false;
  header_read_ = false;
  recycling_pool_options_ = RecyclingPoolOptions();
  decompressor_.reset();
  dictionary_ = Lz4Dictionary();
}

inline void Lz4ReaderBase::Reset(
    BufferOptions buffer_options, bool growing_source, bool concatenate,
    Lz4Dictionary&& dictionary,
    const RecyclingPoolOptions& recycling_pool_options) {
  BufferedReader::Reset(buffer_options);
  growing_source_ = growing_source;
  concatenate_ = concatenate;
  initial_compressed_pos_ = 0;
  truncated_ = false;
  header_read_ = false;
  recycling_pool_options_ = recycling_pool_options;
  decompressor_.reset();
  dictionary_ = std::move(dictionary);
}

template <typename Src>
inline Lz4Reader<Src>::Lz4Reader(Initializer<Src> src, Options options)
    : Lz4ReaderBase(options.buffer_options(), options.growing_source(),
                    options.concatenate(), std::move(options.dictionary()),
                    options.recycling_pool_options()),
      src_(std::move(src)) {
  Initialize(src_.get());
}

template <typename Src>
inline void Lz4Reader<Src>::Reset(Closed) {
  Lz4ReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void Lz4Reader<Src>::Reset(Initializer<Src> src, Options options) {
  Lz4ReaderBase::Reset(options.buffer_options(), options.growing_source(),
                       options.concatenate(), std::move(options.dictionary()),
                       options.recycling_pool_options());
  src_.Reset(std::move(src));
  Initialize(src_.get());
}

template <typename Src>
void Lz4Reader<Src>::Done() {
  Lz4ReaderBase::Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      FailWithoutAnnotation(AnnotateOverSrc(src_->status()));
    }
  }
}

template <typename Src>
void Lz4Reader<Src>::SetReadAllHintImpl(bool read_all_hint) {
  Lz4ReaderBase::SetReadAllHintImpl(read_all_hint);
  if (src_.IsOwning()) src_->SetReadAllHint(read_all_hint);
}

template <typename Src>
void Lz4Reader<Src>::VerifyEndImpl() {
  Lz4ReaderBase::VerifyEndImpl();
  if (src_.IsOwning() && ABSL_PREDICT_TRUE(ok())) src_->VerifyEnd();
}

}  // namespace riegeli

#endif  // RIEGELI_LZ4_LZ4_READER_H_


================================================
FILE: riegeli/lz4/lz4_writer.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Enables the experimental lz4 API:
//  * `LZ4F_compressBegin_usingCDict()`
#define LZ4F_STATIC_LINKING_ONLY

#include "riegeli/lz4/lz4_writer.h"

#include <stddef.h>

#include <limits>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "lz4frame.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/lz4/lz4_reader.h"

namespace riegeli {

void Lz4WriterBase::Initialize(Writer* dest, int compression_level,
                               int window_log, bool store_content_checksum,
                               bool store_block_checksum) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of Lz4Writer: null Writer pointer";
  if (ABSL_PREDICT_FALSE(!dest->ok())) {
    FailWithoutAnnotation(AnnotateOverDest(dest->status()));
    return;
  }
  initial_compressed_pos_ = dest->pos();

  const LZ4F_CDict* compression_dictionary = nullptr;
  if (!dictionary_.empty()) {
    compression_dictionary = dictionary_.PrepareCompressionDictionary();
    if (ABSL_PREDICT_FALSE(compression_dictionary == nullptr)) {
      Fail(absl::InternalError("LZ4F_createCDict() failed"));
      return;
    }
  }

  {
    LZ4F_errorCode_t result = 0;
    compressor_ =
        RecyclingPool<LZ4F_cctx, LZ4F_cctxDeleter>::global(
            recycling_pool_options_)
            .Get([&result] {
              LZ4F_cctx* compressor = nullptr;
              result = LZ4F_createCompressionContext(&compressor, LZ4F_VERSION);
              return std::unique_ptr<LZ4F_cctx, LZ4F_cctxDeleter>(compressor);
            });
    if (ABSL_PREDICT_FALSE(LZ4F_isError(result))) {
      Fail(absl::InternalError(
          absl::StrCat("LZ4F_createCompressionContext() failed: ",
                       LZ4F_getErrorName(result))));
      return;
    }
  }

  preferences_.compressionLevel = compression_level;
  preferences_.frameInfo.blockSizeID = window_log < 18   ? LZ4F_max64KB
                                       : window_log < 20 ? LZ4F_max256KB
                                       : window_log < 22 ? LZ4F_max1MB
                                                         : LZ4F_max4MB;
  preferences_.frameInfo.contentChecksumFlag = store_content_checksum
                                                   ? LZ4F_contentChecksumEnabled
                                                   : LZ4F_noContentChecksum;
  preferences_.frameInfo.contentSize =
      pledged_size_ != std::nullopt
          ? IntCast<unsigned long long>(*pledged_size_)
          : 0;
  preferences_.frameInfo.dictID = dictionary_.dict_id();
  preferences_.frameInfo.blockChecksumFlag =
      store_block_checksum ? LZ4F_blockChecksumEnabled : LZ4F_noBlockChecksum;

  BufferedWriter::SetWriteSizeHintImpl(pledged_size_);
  if (ABSL_PREDICT_FALSE(!dest->Push(LZ4F_HEADER_SIZE_MAX))) {
    FailWithoutAnnotation(AnnotateOverDest(dest->status()));
    return;
  }
  const size_t result = LZ4F_compressBegin_usingCDict(
      compressor_.get(), dest->cursor(), dest->available(),
      compression_dictionary, &preferences_);
  if (ABSL_PREDICT_FALSE(LZ4F_isError(result))) {
    Fail(absl::InternalError(
        absl::StrCat("LZ4F_compressBegin_usingCDict() failed: ",
                     LZ4F_getErrorName(result))));
    return;
  }
  dest->move_cursor(result);
}

void Lz4WriterBase::Done() {
  stable_src_ = true;
  BufferedWriter::Done();
  if (ABSL_PREDICT_TRUE(ok())) {
    if (pledged_size_ != std::nullopt &&
        ABSL_PREDICT_FALSE(start_pos() < *pledged_size_)) {
      Fail(absl::FailedPreconditionError(
          absl::StrCat("Actual size does not match pledged size: ", start_pos(),
                       " < ", *pledged_size_)));
    } else if (compressor_ != nullptr) {
      Writer& dest = *DestWriter();
      DoneCompression(dest);
    }
  }
  compressor_.reset();
  dictionary_ = Lz4Dictionary();
  associated_reader_.Reset();
}

inline bool Lz4WriterBase::DoneCompression(Writer& dest) {
  if (ABSL_PREDICT_FALSE(!dest.Push(LZ4F_compressBound(0, &preferences_)))) {
    return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
  }
  const size_t result = LZ4F_compressEnd(compressor_.get(), dest.cursor(),
                                         dest.available(), nullptr);
  RIEGELI_ASSERT(!LZ4F_isError(result))
      << "LZ4F_compressEnd() failed: " << LZ4F_getErrorName(result);
  dest.move_cursor(result);
  return true;
}

absl::Status Lz4WriterBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    Writer& dest = *DestWriter();
    status = dest.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `dest` with the compressed
  // position. Clarify that the current position is the uncompressed position
  // instead of delegating to `BufferedWriter::AnnotateStatusImpl()`.
  return AnnotateOverDest(std::move(status));
}

absl::Status Lz4WriterBase::AnnotateOverDest(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("at uncompressed byte ", pos()));
  }
  return status;
}

bool Lz4WriterBase::WriteInternal(absl::string_view src) {
  RIEGELI_ASSERT(!src.empty())
      << "Failed precondition of BufferedWriter::WriteInternal(): "
         "nothing to write";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedWriter::WriteInternal()";
  Writer& dest = *DestWriter();
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  if (pledged_size_ != std::nullopt) {
    const Position next_pos = start_pos() + src.size();
    if (next_pos >= *pledged_size_) {
      if (ABSL_PREDICT_FALSE(next_pos > *pledged_size_)) {
        return Fail(absl::FailedPreconditionError(
            absl::StrCat("Actual size does not match pledged size: ", next_pos,
                         " > ", *pledged_size_)));
      }
      stable_src_ = true;
    }
  }
  RIEGELI_ASSERT_NE(compressor_, nullptr)
      << "compressor_ == nullptr when the pledged size was already written, "
         "which was checked above";
  size_t block_size;
  switch (preferences_.frameInfo.blockSizeID) {
    case LZ4F_max64KB:
      block_size = size_t{64} << 10;
      break;
    case LZ4F_max256KB:
      block_size = size_t{256} << 10;
      break;
    case LZ4F_max1MB:
      block_size = size_t{1} << 20;
      break;
    case LZ4F_max4MB:
      block_size = size_t{4} << 20;
      break;
    default:
      RIEGELI_ASSUME_UNREACHABLE()
          << "Unexpected preferences_.frameInfo.blockSizeID: "
          << preferences_.frameInfo.blockSizeID;
  }
  LZ4F_compressOptions_t compress_options{};
  do {
    size_t src_length = src.size();
    compress_options.stableSrc = stable_src_ ? 1 : 0;
    if (!reserve_max_size_) {
      // Compressing the whole `src` in one step would require asking `dest`
      // for a potentially large flat buffer. To avoid this, split `src` into
      // smaller pieces.
      src_length = UnsignedMin(src_length, block_size - buffered_length_);
      // To reduce data copying, claim `compress_options.stableSrc` also when
      // this piece of `src` will not be needed after `WriteInternal()` returns
      // because at least a full block follows this piece.
      if (src.size() >= src_length + block_size) compress_options.stableSrc = 1;
    }
    if (ABSL_PREDICT_FALSE(
            !dest.Push(LZ4F_compressBound(src_length, &preferences_)))) {
      return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
    }
    const size_t result =
        LZ4F_compressUpdate(compressor_.get(), dest.cursor(), dest.available(),
                            src.data(), src_length, &compress_options);
    if (ABSL_PREDICT_FALSE(LZ4F_isError(result))) {
      return Fail(absl::InternalError(absl::StrCat(
          "LZ4F_compressUpdate() failed: ", LZ4F_getErrorName(result))));
    }
    dest.move_cursor(result);
    move_start_pos(src_length);
    src.remove_prefix(src_length);
    buffered_length_ = (buffered_length_ + src_length) & (block_size - 1);
  } while (!src.empty());
  if (stable_src_) {
    // `LZ4F_compressEnd()` must be called while `src` is still valid.
    if (ABSL_PREDICT_FALSE(!DoneCompression(dest))) return false;
    compressor_.reset();
  }
  return true;
}

bool Lz4WriterBase::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!BufferedWriter::FlushImpl(flush_type))) return false;
  if (compressor_ == nullptr) return true;
  Writer& dest = *DestWriter();
  if (ABSL_PREDICT_FALSE(!dest.Push(LZ4F_compressBound(0, &preferences_)))) {
    return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
  }
  const size_t result =
      LZ4F_flush(compressor_.get(), dest.cursor(), dest.available(), nullptr);
  RIEGELI_ASSERT(!LZ4F_isError(result))
      << "LZ4F_flush() failed: " << LZ4F_getErrorName(result);
  dest.move_cursor(result);
  buffered_length_ = 0;
  return true;
}

bool Lz4WriterBase::SupportsReadMode() {
  Writer* const dest = DestWriter();
  return dest != nullptr && dest->SupportsReadMode();
}

Reader* Lz4WriterBase::ReadModeBehindBuffer(Position initial_pos) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::ReadModeBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!Lz4WriterBase::FlushImpl(FlushType::kFromObject))) {
    return nullptr;
  }
  Writer& dest = *DestWriter();
  Reader* const compressed_reader = dest.ReadMode(initial_compressed_pos_);
  if (ABSL_PREDICT_FALSE(compressed_reader == nullptr)) {
    FailWithoutAnnotation(AnnotateOverDest(dest.status()));
    return nullptr;
  }
  Lz4Reader<>* const reader = associated_reader_.ResetReader(
      compressed_reader,
      Lz4ReaderBase::Options()
          .set_dictionary(dictionary_)
          .set_buffer_options(buffer_options())
          .set_recycling_pool_options(recycling_pool_options_));
  reader->Seek(initial_pos);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/lz4/lz4_writer.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_LZ4_LZ4_WRITER_H_
#define RIEGELI_LZ4_LZ4_WRITER_H_

#include <stddef.h>

#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "lz4frame.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/lz4/lz4_dictionary.h"  // IWYU pragma: export

namespace riegeli {

class Reader;
template <typename Src>
class Lz4Reader;

// Template parameter independent part of `Lz4Writer`.
class Lz4WriterBase : public BufferedWriter {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {
      RIEGELI_ASSERT_EQ(LZ4F_compressionLevel_max(), kMaxCompressionLevel)
          << "Unexpected value of LZ4F_compressionLevel_max()";
    }

    // Tunes the tradeoff between compression density and compression speed
    // (higher = better density but slower).
    //
    // `compression_level` must be between `kMinCompressionLevel` (`-65536`)
    // and `kMaxCompressionLevel` (12). Levels [0..2] are currently equivalent.
    // Default: `kDefaultCompressionLevel` (0).
    static constexpr int kMinCompressionLevel = -(64 << 10);
    static constexpr int kMaxCompressionLevel =
        12;  // `LZ4F_compressionLevel_max()`
    static constexpr int kDefaultCompressionLevel = 0;
    Options& set_compression_level(int compression_level) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GE(compression_level, kMinCompressionLevel)
          << "Failed precondition of "
             "Lz4WriterBase::Options::set_compression_level(): "
             "compression level out of range";
      RIEGELI_ASSERT_LE(compression_level, kMaxCompressionLevel)
          << "Failed precondition of "
             "Lz4WriterBase::Options::set_compression_level(): "
             "compression level out of range";
      compression_level_ = compression_level;
      return *this;
    }
    Options&& set_compression_level(int compression_level) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_compression_level(compression_level));
    }
    int compression_level() const { return compression_level_; }

    // Logarithm of the block size. This tunes the tradeoff between compression
    // density and memory usage (higher = better density but more memory).
    //
    // Lz4 supports four effective values: 16, 18, 20, and 22. Other values are
    // rounded downwards.
    //
    // `window_log` must be between `kMinWindowLog` (0) and `kMaxWindowLog`
    // (22). Default: `kDefaultWindowLog` (16).
    static constexpr int kMinWindowLog = 16;
    static constexpr int kMaxWindowLog = 22;
    static constexpr int kDefaultWindowLog = 16;
    Options& set_window_log(int window_log) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GE(window_log, kMinWindowLog)
          << "Failed precondition of Lz4WriterBase::Options::set_window_log(): "
             "window log out of range";
      RIEGELI_ASSERT_LE(window_log, kMaxWindowLog)
          << "Failed precondition of Lz4WriterBase::Options::set_window_log(): "
             "window log out of range";
      window_log_ = window_log;
      return *this;
    }
    Options&& set_window_log(int window_log) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_window_log(window_log));
    }
    int window_log() const { return window_log_; }

    // Lz4 dictionary. The same dictionary must be used for decompression.
    //
    // Default: `Lz4Dictionary()`.
    Options& set_dictionary(Lz4Dictionary dictionary) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      dictionary_ = std::move(dictionary);
      return *this;
    }
    Options&& set_dictionary(Lz4Dictionary dictionary) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_dictionary(std::move(dictionary)));
    }
    Lz4Dictionary& dictionary() ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return dictionary_;
    }
    const Lz4Dictionary& dictionary() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return dictionary_;
    }

    // If `true`, computes checksum of uncompressed data and stores it in the
    // compressed stream for each frame, i.e. at coarse granularity. This lets
    // decompression verify the checksum.
    //
    // Default: `false`.
    Options& set_store_content_checksum(bool store_content_checksum) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      store_content_checksum_ = store_content_checksum;
      return *this;
    }
    Options&& set_store_content_checksum(bool store_content_checksum) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_store_content_checksum(store_content_checksum));
    }
    bool store_content_checksum() const { return store_content_checksum_; }

    // If `true`, computes checksum of uncompressed data and stores it in the
    // compressed stream for each block, i.e. at coarse granularity. This lets
    // decompression verify the checksum.
    //
    // Default: `false`.
    Options& set_store_block_checksum(bool store_block_checksum) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      store_block_checksum_ = store_block_checksum;
      return *this;
    }
    Options&& set_store_block_checksum(bool store_block_checksum) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_store_block_checksum(store_block_checksum));
    }
    bool store_block_checksum() const { return store_block_checksum_; }

    // Exact uncompressed size, or `std::nullopt` if unknown. This may improve
    // compression density and performance, and causes the size to be stored in
    // the compressed stream header.
    //
    // If the pledged size turns out to not match reality, compression fails.
    //
    // Default: `std::nullopt`.
    Options& set_pledged_size(std::optional<Position> pledged_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      pledged_size_ = pledged_size;
      return *this;
    }
    Options&& set_pledged_size(std::optional<Position> pledged_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_pledged_size(pledged_size));
    }
    std::optional<Position> pledged_size() const { return pledged_size_; }

    // If `false`, `Lz4Writer` lets the destination choose buffer sizes
    // (at least the maximum possible compressed size of a block size though).
    //
    // If `true`, `Lz4Writer` tries to compress all data in one step:
    //
    //  * Flattens uncompressed data if `pledged_size()` is not `std::nullopt`.
    //
    //  * Asks the destination for a flat buffer with the maximum possible
    //    compressed size for each flat piece of uncompressed data.
    //
    // This makes compression slightly faster, but increases memory usage.
    //
    // Default: `false`.
    Options& set_reserve_max_size(bool reserve_max_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      reserve_max_size_ = reserve_max_size;
      return *this;
    }
    Options&& set_reserve_max_size(bool reserve_max_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_reserve_max_size(reserve_max_size));
    }
    bool reserve_max_size() const { return reserve_max_size_; }

    // Returns effective `BufferOptions` as overridden by other options:
    // If `reserve_max_size()` is `true` and `pledged_size()` is not
    // `std::nullopt`, then `pledged_size()` overrides `buffer_size()`.
    BufferOptions effective_buffer_options() const {
      BufferOptions options = buffer_options();
      if (reserve_max_size() && pledged_size() != std::nullopt) {
        options.set_buffer_size(
            UnsignedMax(SaturatingIntCast<size_t>(*pledged_size()), size_t{1}));
      }
      return options;
    }

    // Options for a global `RecyclingPool` of compression contexts.
    //
    // They tune the amount of memory which is kept to speed up creation of new
    // compression sessions, and usage of a background thread to clean it.
    //
    // Default: `RecyclingPoolOptions()`.
    Options& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      recycling_pool_options_ = recycling_pool_options;
      return *this;
    }
    Options&& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_recycling_pool_options(recycling_pool_options));
    }
    const RecyclingPoolOptions& recycling_pool_options() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recycling_pool_options_;
    }

   private:
    int compression_level_ = kDefaultCompressionLevel;
    int window_log_ = kDefaultWindowLog;
    Lz4Dictionary dictionary_;
    bool store_content_checksum_ = false;
    bool store_block_checksum_ = false;
    std::optional<Position> pledged_size_;
    bool reserve_max_size_ = false;
    RecyclingPoolOptions recycling_pool_options_;
  };

  // Returns the compressed `Writer`. Unchanged by `Close()`.
  virtual Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool SupportsReadMode() override;

 protected:
  explicit Lz4WriterBase(Closed) noexcept : BufferedWriter(kClosed) {}

  explicit Lz4WriterBase(BufferOptions buffer_options,
                         Lz4Dictionary&& dictionary,
                         std::optional<Position> pledged_size,
                         bool reserve_max_size,
                         const RecyclingPoolOptions& recycling_pool_options);

  Lz4WriterBase(Lz4WriterBase&& that) noexcept;
  Lz4WriterBase& operator=(Lz4WriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options, Lz4Dictionary&& dictionary,
             std::optional<Position> pledged_size, bool reserve_max_size,
             const RecyclingPoolOptions& recycling_pool_options);
  void Initialize(Writer* dest, int compression_level, int window_log,
                  bool store_content_checksum, bool store_block_checksum);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverDest(absl::Status status);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool WriteInternal(absl::string_view src) override;
  bool FlushImpl(FlushType flush_type);
  Reader* ReadModeBehindBuffer(Position initial_pos) override;

 private:
  struct LZ4F_cctxDeleter {
    void operator()(LZ4F_cctx* ptr) const {
      const LZ4F_errorCode_t result = LZ4F_freeCompressionContext(ptr);
      RIEGELI_ASSERT(!LZ4F_isError(result))
          << "LZ4F_freeCompressionContext() failed: "
          << LZ4F_getErrorName(result);
    }
  };

  bool DoneCompression(Writer& dest);

  Lz4Dictionary dictionary_;
  std::optional<Position> pledged_size_;
  bool reserve_max_size_ = false;
  Position initial_compressed_pos_ = 0;
  LZ4F_preferences_t preferences_{};
  RecyclingPoolOptions recycling_pool_options_;
  // If `ok()` but `compressor_ == nullptr` then `LZ4F_compressEnd()` was
  // already called.
  RecyclingPool<LZ4F_cctx, LZ4F_cctxDeleter>::Handle compressor_;
  // `stable_src_` becomes `true` when all data remaining to compress are known
  // to stay under their current addresses.
  bool stable_src_ = false;
  // The amount of uncompressed data buffered in `LZ4F_cctx`. This allows to
  // reduce data copying by aligning source boundaries appropriately.
  size_t buffered_length_ = 0;

  AssociatedReader<Lz4Reader<Reader*>> associated_reader_;
};

// A `Writer` which compresses data with Lz4 before passing it to another
// `Writer`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the compressed `Writer`. `Dest` must support
// `Dependency<Writer*, Dest>`, e.g. `Writer*` (not owned, default),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The compressed `Writer` must not be accessed until the `Lz4Writer` is closed
// or no longer used, except that it is allowed to read the destination of the
// compressed `Writer` immediately after `Flush()`.
template <typename Dest = Writer*>
class Lz4Writer : public Lz4WriterBase {
 public:
  // Creates a closed `Lz4Writer`.
  explicit Lz4Writer(Closed) noexcept : Lz4WriterBase(kClosed) {}

  // Will write to the compressed `Writer` provided by `dest`.
  explicit Lz4Writer(Initializer<Dest> dest, Options options = Options());

  Lz4Writer(Lz4Writer&& that) = default;
  Lz4Writer& operator=(Lz4Writer&& that) = default;

  // Makes `*this` equivalent to a newly constructed `Lz4Writer`. This avoids
  // constructing a temporary `Lz4Writer` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the compressed `Writer`.
  // Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  // The object providing and possibly owning the compressed `Writer`.
  Dependency<Writer*, Dest> dest_;
};

explicit Lz4Writer(Closed) -> Lz4Writer<DeleteCtad<Closed>>;
template <typename Dest>
explicit Lz4Writer(Dest&& dest,
                   Lz4WriterBase::Options options = Lz4WriterBase::Options())
    -> Lz4Writer<TargetT<Dest>>;

// Implementation details follow.

inline Lz4WriterBase::Lz4WriterBase(
    BufferOptions buffer_options, Lz4Dictionary&& dictionary,
    std::optional<Position> pledged_size, bool reserve_max_size,
    const RecyclingPoolOptions& recycling_pool_options)
    : BufferedWriter(buffer_options),
      dictionary_(std::move(dictionary)),
      pledged_size_(pledged_size),
      reserve_max_size_(reserve_max_size),
      recycling_pool_options_(recycling_pool_options) {}

inline Lz4WriterBase::Lz4WriterBase(Lz4WriterBase&& that) noexcept
    : BufferedWriter(static_cast<BufferedWriter&&>(that)),
      dictionary_(std::move(that.dictionary_)),
      pledged_size_(that.pledged_size_),
      reserve_max_size_(that.reserve_max_size_),
      initial_compressed_pos_(that.initial_compressed_pos_),
      preferences_(that.preferences_),
      recycling_pool_options_(that.recycling_pool_options_),
      compressor_(std::move(that.compressor_)),
      stable_src_(that.stable_src_),
      buffered_length_(that.buffered_length_),
      associated_reader_(std::move(that.associated_reader_)) {}

inline Lz4WriterBase& Lz4WriterBase::operator=(Lz4WriterBase&& that) noexcept {
  BufferedWriter::operator=(static_cast<BufferedWriter&&>(that));
  dictionary_ = std::move(that.dictionary_);
  pledged_size_ = that.pledged_size_;
  reserve_max_size_ = that.reserve_max_size_;
  initial_compressed_pos_ = that.initial_compressed_pos_;
  preferences_ = that.preferences_;
  recycling_pool_options_ = that.recycling_pool_options_;
  compressor_ = std::move(that.compressor_);
  stable_src_ = that.stable_src_;
  buffered_length_ = that.buffered_length_;
  associated_reader_ = std::move(that.associated_reader_);
  return *this;
}

inline void Lz4WriterBase::Reset(Closed) {
  BufferedWriter::Reset(kClosed);
  pledged_size_ = std::nullopt;
  reserve_max_size_ = false;
  initial_compressed_pos_ = 0;
  preferences_ = {};
  recycling_pool_options_ = RecyclingPoolOptions();
  compressor_.reset();
  // Must be destroyed after `compressor_`.
  dictionary_ = Lz4Dictionary();
  stable_src_ = false;
  buffered_length_ = 0;
  associated_reader_.Reset();
}

inline void Lz4WriterBase::Reset(
    BufferOptions buffer_options, Lz4Dictionary&& dictionary,
    std::optional<Position> pledged_size, bool reserve_max_size,
    const RecyclingPoolOptions& recycling_pool_options) {
  BufferedWriter::Reset(buffer_options);
  pledged_size_ = pledged_size;
  reserve_max_size_ = reserve_max_size;
  initial_compressed_pos_ = 0;
  recycling_pool_options_ = recycling_pool_options;
  preferences_ = {};
  compressor_.reset();
  // Must be destroyed after `compressor_`.
  dictionary_ = std::move(dictionary);
  stable_src_ = false;
  buffered_length_ = 0;
  associated_reader_.Reset();
}

template <typename Dest>
inline Lz4Writer<Dest>::Lz4Writer(Initializer<Dest> dest, Options options)
    : Lz4WriterBase(options.effective_buffer_options(),
                    std::move(options.dictionary()), options.pledged_size(),
                    options.reserve_max_size(),
                    options.recycling_pool_options()),
      dest_(std::move(dest)) {
  Initialize(dest_.get(), options.compression_level(), options.window_log(),
             options.store_content_checksum(), options.store_block_checksum());
}

template <typename Dest>
inline void Lz4Writer<Dest>::Reset(Closed) {
  Lz4WriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void Lz4Writer<Dest>::Reset(Initializer<Dest> dest, Options options) {
  Lz4WriterBase::Reset(options.effective_buffer_options(),
                       std::move(options.dictionary()), options.pledged_size(),
                       options.reserve_max_size(),
                       options.recycling_pool_options());
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options.compression_level(), options.window_log(),
             options.store_content_checksum(), options.store_block_checksum());
}

template <typename Dest>
void Lz4Writer<Dest>::Done() {
  Lz4WriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
}

template <typename Dest>
bool Lz4Writer<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!Lz4WriterBase::FlushImpl(flush_type))) return false;
  if (flush_type != FlushType::kFromObject || dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Flush(flush_type))) {
      return FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
  return true;
}

}  // namespace riegeli

#endif  // RIEGELI_LZ4_LZ4_WRITER_H_


================================================
FILE: riegeli/messages/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "parse_message",
    srcs = ["parse_message.cc"],
    hdrs = ["parse_message.h"],
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:bytes_ref",
        "//riegeli/base:chain",
        "//riegeli/base:cord_iterator_span",
        "//riegeli/base:dependency",
        "//riegeli/base:types",
        "//riegeli/bytes:chain_reader",
        "//riegeli/bytes:cord_reader",
        "//riegeli/bytes:limiting_reader",
        "//riegeli/bytes:reader",
        "//riegeli/varint:varint_reading",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_protobuf//:protobuf_lite",
    ],
)

cc_library(
    name = "serialize_message",
    srcs = ["serialize_message.cc"],
    hdrs = ["serialize_message.h"],
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:chain",
        "//riegeli/base:compact_string",
        "//riegeli/base:dependency",
        "//riegeli/base:string_utils",
        "//riegeli/base:types",
        "//riegeli/bytes:array_writer",
        "//riegeli/bytes:backward_writer",
        "//riegeli/bytes:chain_writer",
        "//riegeli/bytes:cord_writer",
        "//riegeli/bytes:writer",
        "//riegeli/varint:varint_writing",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/types:span",
        "@com_google_protobuf//:protobuf_lite",
    ],
)

cc_library(
    name = "text_parse_message",
    srcs = ["text_parse_message.cc"],
    hdrs = ["text_parse_message.h"],
    deps = [
        ":parse_message",
        "//riegeli/base:bytes_ref",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/bytes:chain_reader",
        "//riegeli/bytes:cord_reader",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:string_reader",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_protobuf//:protobuf",
    ],
)

cc_library(
    name = "text_print_message",
    srcs = ["text_print_message.cc"],
    hdrs = ["text_print_message.h"],
    deps = [
        ":serialize_message",
        "//riegeli/base:assert",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/bytes:chain_writer",
        "//riegeli/bytes:cord_writer",
        "//riegeli/bytes:string_writer",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:cord",
        "@com_google_protobuf//:protobuf",
    ],
)

cc_library(
    name = "message_wire_format",
    hdrs = ["message_wire_format.h"],
)

cc_library(
    name = "map_entry_field",
    hdrs = ["map_entry_field.h"],
    deps = ["@com_google_absl//absl/base:nullability"],
)

cc_library(
    name = "serialized_message_reader_internal",
    srcs = ["serialized_message_reader.cc"],
    hdrs = ["serialized_message_reader_internal.h"],
    visibility = ["//visibility:private"],
    deps = [
        ":message_wire_format",
        "//riegeli/base:cord_iterator_span",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:limiting_reader",
        "//riegeli/bytes:reader",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "serialized_message_reader",
    srcs = ["field_handlers.cc"],
    hdrs = [
        "field_handlers.h",
        "serialized_message_reader.h",
    ],
    deps = [
        ":message_wire_format",
        ":serialized_message_reader_internal",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:bytes_ref",
        "//riegeli/base:chain",
        "//riegeli/base:cord_iterator_span",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:reset",
        "//riegeli/base:type_traits",
        "//riegeli/base:types",
        "//riegeli/bytes:chain_reader",
        "//riegeli/bytes:cord_reader",
        "//riegeli/bytes:limiting_reader",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:string_reader",
        "//riegeli/endian:endian_reading",
        "//riegeli/varint:varint_reading",
        "@com_google_absl//absl/base",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "context_projection",
    hdrs = ["context_projection.h"],
    deps = [
        ":serialized_message_reader_internal",
        "//riegeli/base:cord_iterator_span",
        "//riegeli/bytes:limiting_reader",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "dynamic_field_handler",
    hdrs = ["dynamic_field_handler.h"],
    deps = [
        ":serialized_message_reader",
        ":serialized_message_reader_internal",
        "//riegeli/base:cord_iterator_span",
        "//riegeli/bytes:limiting_reader",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "field_handler_map",
    hdrs = ["field_handler_map.h"],
    deps = [
        ":serialized_message_reader",
        ":serialized_message_reader_internal",
        "//riegeli/base:arithmetic",
        "//riegeli/base:cord_iterator_span",
        "//riegeli/base:hybrid_direct_map",
        "//riegeli/base:initializer",
        "//riegeli/bytes:limiting_reader",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/container:flat_hash_map",
        "@com_google_absl//absl/functional:any_invocable",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "field_copier",
    hdrs = ["field_copier.h"],
    deps = [
        ":message_wire_format",
        ":serialized_message_reader",
        ":serialized_message_writer",
        "//riegeli/base:cord_iterator_span",
        "//riegeli/bytes:limiting_reader",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "serialized_message_writer",
    srcs = ["serialized_message_writer.cc"],
    hdrs = ["serialized_message_writer.h"],
    deps = [
        ":message_wire_format",
        ":serialize_message",
        "//riegeli/base:any",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:constexpr",
        "//riegeli/base:cord_iterator_span",
        "//riegeli/base:types",
        "//riegeli/bytes:cord_writer",
        "//riegeli/bytes:limiting_reader",
        "//riegeli/bytes:null_writer",
        "//riegeli/bytes:read_all",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:stringify",
        "//riegeli/bytes:writer",
        "//riegeli/endian:endian_writing",
        "//riegeli/varint:varint_writing",
        "@com_google_absl//absl/base",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_protobuf//:protobuf_lite",
    ],
)

cc_library(
    name = "serialized_message_backward_writer",
    srcs = ["serialized_message_backward_writer.cc"],
    hdrs = ["serialized_message_backward_writer.h"],
    deps = [
        ":message_wire_format",
        ":serialize_message",
        "//riegeli/base:any",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:cord_iterator_span",
        "//riegeli/base:types",
        "//riegeli/bytes:backward_writer",
        "//riegeli/bytes:copy_all",
        "//riegeli/bytes:limiting_reader",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:stringify",
        "//riegeli/endian:endian_writing",
        "//riegeli/varint:varint_writing",
        "@com_google_absl//absl/base",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_protobuf//:protobuf_lite",
    ],
)

cc_library(
    name = "serialized_message_assembler",
    srcs = ["serialized_message_assembler.cc"],
    hdrs = ["serialized_message_assembler.h"],
    deps = [
        ":field_copier",
        ":field_handler_map",
        ":serialized_message_reader",
        ":serialized_message_writer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:bytes_ref",
        "//riegeli/base:chain",
        "//riegeli/base:cord_iterator_span",
        "//riegeli/base:dependency",
        "//riegeli/base:reset",
        "//riegeli/bytes:chain_writer",
        "//riegeli/bytes:cord_writer",
        "//riegeli/bytes:limiting_reader",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:string_writer",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/base:nullability",
        "@com_google_absl//absl/container:inlined_vector",
        "@com_google_absl//absl/container:linked_hash_map",
        "@com_google_absl//absl/functional:any_invocable",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
    ],
)


================================================
FILE: riegeli/messages/context_projection.h
================================================
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_MESSAGES_CONTEXT_PROJECTION_H_
#define RIEGELI_MESSAGES_CONTEXT_PROJECTION_H_

#include <stddef.h>
#include <stdint.h>

#include <string>
#include <tuple>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/cord_iterator_span.h"
#include "riegeli/bytes/limiting_reader.h"
#include "riegeli/messages/serialized_message_reader_internal.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// The type of `ContextAt<indices...>`.
template <size_t... indices>
struct ContextAtImpl;

// A projection for `ContextProjection()` which selects a subset of context
// parameters. The other context parameters are ignored.
template <size_t... indices>
constexpr ContextAtImpl<indices...> ContextAt = {};

// The type returned by `ContextProjection()` with a single projection.
template <auto projection, typename FieldHandler>
class ContextProjectionImpl;

// Adapts a field handler to depend on a part of the context. Makes a field
// handler taking some outer context parameters from a field handler taking
// inner context parameters.
//
// `projection...` parameters are applied sequentially to transform the outer
// context to the inner context. Each `projection` is an invocable which
// returns either a reference or a tuple of references. Common projections
// include `ContextAt<indices...>` to select a subset of context parameters,
// and `&Context::member` to select a member of the only context parameter.

template <typename FieldHandler>
constexpr FieldHandler&& ContextProjection(FieldHandler&& field_handler) {
  return std::forward<FieldHandler>(field_handler);
}

template <auto projection, typename FieldHandler>
constexpr ContextProjectionImpl<projection, std::decay_t<FieldHandler>>
ContextProjection(FieldHandler&& field_handler) {
  return ContextProjectionImpl<projection, std::decay_t<FieldHandler>>(
      std::forward<FieldHandler>(field_handler));
}

template <auto first_projection, auto second_projection,
          auto... rest_projections, typename FieldHandler>
constexpr auto ContextProjection(FieldHandler&& field_handler) {
  return ContextProjection<first_projection>(
      ContextProjection<second_projection, rest_projections...>(
          std::forward<FieldHandler>(field_handler)));
}

// Implementation details follow.

namespace context_projection_internal {

template <typename T>
struct DecodeContextResult {};

template <typename Context>
struct DecodeContextResult<Context&>
    : std::type_identity<std::tuple<Context&>> {};

template <typename... Context>
struct DecodeContextResult<std::tuple<Context&...>>
    : std::type_identity<std::tuple<Context&...>> {};

template <auto projection, typename... Context>
struct InnerContextTuple
    : DecodeContextResult<
          std::invoke_result_t<decltype(projection), Context&...>> {};

}  // namespace context_projection_internal

template <size_t... indices>
struct ContextAtImpl {
  template <typename... Context>
  std::tuple<std::tuple_element_t<indices, std::tuple<Context&...>>...>
  operator()(Context&... context) const {
    const std::tuple<Context&...> context_tuple = {context...};
    return {std::get<indices>(context_tuple)...};
  }
};

template <auto projection, typename FieldHandler>
class ContextProjectionImpl {
 private:
  template <template <typename T, typename... InnerContext> class Predicate,
            typename InnerContextTuple>
  struct IsProjectedFieldHandlerImpl;

  template <template <typename T, typename... InnerContext> class Predicate,
            typename... InnerContext>
  struct IsProjectedFieldHandlerImpl<Predicate, std::tuple<InnerContext&...>>
      : Predicate<FieldHandler, InnerContext...> {};

  template <template <typename T, typename... InnerContext> class Predicate,
            typename... Context>
  struct IsProjectedFieldHandler
      : IsProjectedFieldHandlerImpl<
            Predicate, typename context_projection_internal::InnerContextTuple<
                           projection, Context...>::type> {};

 public:
  static constexpr int kFieldNumber = FieldHandler::kFieldNumber;

  template <typename FieldHandlerInitializer,
            std::enable_if_t<
                std::is_convertible_v<FieldHandlerInitializer&&, FieldHandler>,
                int> = 0>
  explicit constexpr ContextProjectionImpl(
      FieldHandlerInitializer&& field_handler)
      : field_handler_(std::forward<FieldHandlerInitializer>(field_handler)) {}

  ContextProjectionImpl(const ContextProjectionImpl& that) = default;
  ContextProjectionImpl& operator=(const ContextProjectionImpl& that) = default;

  ContextProjectionImpl(ContextProjectionImpl&& that) = default;
  ContextProjectionImpl& operator=(ContextProjectionImpl&& that) = default;

  template <
      typename... Context,
      std::enable_if_t<
          IsProjectedFieldHandler<
              serialized_message_reader_internal::IsStaticFieldHandlerForVarint,
              Context...>::value,
          int> = 0>
  absl::Status HandleVarint(uint64_t repr, Context&... context) const {
    return std::apply(
        [&](auto&... inner_context) {
          return field_handler_.HandleVarint(repr, inner_context...);
        },
        InnerContext(context...));
  }

  template <typename DependentFieldHandler = FieldHandler,
            std::enable_if_t<serialized_message_reader_internal::
                                 IsDynamicFieldHandlerForVarintSomeContext<
                                     DependentFieldHandler>::value,
                             int> = 0>
  auto AcceptVarint(int field_number) const {
    return field_handler_.AcceptVarint(field_number);
  }

  template <typename Accepted, typename... Context,
            std::enable_if_t<
                IsProjectedFieldHandler<serialized_message_reader_internal::
                                            IsDynamicFieldHandlerForVarint,
                                        Context...>::value,
                int> = 0>
  absl::Status DynamicHandleVarint(Accepted&& accepted, uint64_t repr,
                                   Context&... context) const {
    return std::apply(
        [&](auto&... inner_context) {
          return field_handler_.DynamicHandleVarint(
              std::forward<Accepted>(accepted), repr, inner_context...);
        },
        InnerContext(context...));
  }

  template <typename... Context,
            std::enable_if_t<
                IsProjectedFieldHandler<serialized_message_reader_internal::
                                            IsStaticFieldHandlerForFixed32,
                                        Context...>::value,
                int> = 0>
  absl::Status HandleFixed32(uint32_t repr, Context&... context) const {
    return std::apply(
        [&](auto&... inner_context) {
          return field_handler_.HandleFixed32(repr, inner_context...);
        },
        InnerContext(context...));
  }

  template <typename DependentFieldHandler = FieldHandler,
            std::enable_if_t<serialized_message_reader_internal::
                                 IsDynamicFieldHandlerForFixed32SomeContext<
                                     DependentFieldHandler>::value,
                             int> = 0>
  auto AcceptFixed32(int field_number) const {
    return field_handler_.AcceptFixed32(field_number);
  }

  template <typename Accepted, typename... Context,
            std::enable_if_t<
                IsProjectedFieldHandler<serialized_message_reader_internal::
                                            IsDynamicFieldHandlerForFixed32,
                                        Context...>::value,
                int> = 0>
  absl::Status DynamicHandleFixed32(Accepted&& accepted, uint32_t repr,
                                    Context&... context) const {
    return std::apply(
        [&](auto&... inner_context) {
          return field_handler_.DynamicHandleFixed32(
              std::forward<Accepted>(accepted), repr, inner_context...);
        },
        InnerContext(context...));
  }

  template <typename... Context,
            std::enable_if_t<
                IsProjectedFieldHandler<serialized_message_reader_internal::
                                            IsStaticFieldHandlerForFixed64,
                                        Context...>::value,
                int> = 0>
  absl::Status HandleFixed64(uint64_t repr, Context&... context) const {
    return std::apply(
        [&](auto&... inner_context) {
          return field_handler_.HandleFixed64(repr, inner_context...);
        },
        InnerContext(context...));
  }

  template <typename DependentFieldHandler = FieldHandler,
            std::enable_if_t<serialized_message_reader_internal::
                                 IsDynamicFieldHandlerForFixed64SomeContext<
                                     DependentFieldHandler>::value,
                             int> = 0>
  auto AcceptFixed64(int field_number) const {
    return field_handler_.AcceptFixed64(field_number);
  }

  template <typename Accepted, typename... Context,
            std::enable_if_t<
                IsProjectedFieldHandler<serialized_message_reader_internal::
                                            IsDynamicFieldHandlerForFixed64,
                                        Context...>::value,
                int> = 0>
  absl::Status DynamicHandleFixed64(Accepted&& accepted, uint64_t repr,
                                    Context&... context) const {
    return std::apply(
        [&](auto&... inner_context) {
          return field_handler_.DynamicHandleFixed64(
              std::forward<Accepted>(accepted), repr, inner_context...);
        },
        InnerContext(context...));
  }

  template <
      typename... Context,
      std::enable_if_t<IsProjectedFieldHandler<
                           serialized_message_reader_internal::
                               IsStaticFieldHandlerForLengthDelimitedFromReader,
                           Context...>::value,
                       int> = 0>
  absl::Status HandleLengthDelimitedFromReader(ReaderSpan<> repr,
                                               Context&... context) const {
    return std::apply(
        [&](auto&... inner_context) {
          return field_handler_.HandleLengthDelimitedFromReader(
              std::move(repr), inner_context...);
        },
        InnerContext(context...));
  }

  template <
      typename... Context,
      std::enable_if_t<IsProjectedFieldHandler<
                           serialized_message_reader_internal::
                               IsStaticFieldHandlerForLengthDelimitedFromCord,
                           Context...>::value,
                       int> = 0>
  absl::Status HandleLengthDelimitedFromCord(CordIteratorSpan repr,
                                             std::string& scratch,
                                             Context&... context) const {
    return std::apply(
        [&](auto&... inner_context) {
          return field_handler_.HandleLengthDelimitedFromCord(
              std::move(repr), scratch, inner_context...);
        },
        InnerContext(context...));
  }

  template <
      typename... Context,
      std::enable_if_t<IsProjectedFieldHandler<
                           serialized_message_reader_internal::
                               IsStaticFieldHandlerForLengthDelimitedFromString,
                           Context...>::value,
                       int> = 0>
  absl::Status HandleLengthDelimitedFromString(absl::string_view repr,
                                               Context&... context) const {
    return std::apply(
        [&](auto&... inner_context) {
          return field_handler_.HandleLengthDelimitedFromString(
              repr, inner_context...);
        },
        InnerContext(context...));
  }

  template <
      typename DependentFieldHandler = FieldHandler,
      std::enable_if_t<serialized_message_reader_internal::
                           IsDynamicFieldHandlerForLengthDelimitedSomeContext<
                               DependentFieldHandler>::value,
                       int> = 0>
  auto AcceptLengthDelimited(int field_number) const {
    return field_handler_.AcceptLengthDelimited(field_number);
  }

  template <typename Accepted, typename... Context,
            std::enable_if_t<
                IsProjectedFieldHandler<
                    serialized_message_reader_internal::
                        IsDynamicFieldHandlerForLengthDelimitedFromReader,
                    Context...>::value,
                int> = 0>
  absl::Status DynamicHandleLengthDelimitedFromReader(
      Accepted&& accepted, ReaderSpan<> repr, Context&... context) const {
    return std::apply(
        [&](auto&... inner_context) {
          return field_handler_.DynamicHandleLengthDelimitedFromReader(
              std::forward<Accepted>(accepted), std::move(repr),
              inner_context...);
        },
        InnerContext(context...));
  }

  template <
      typename Accepted, typename... Context,
      std::enable_if_t<IsProjectedFieldHandler<
                           serialized_message_reader_internal::
                               IsDynamicFieldHandlerForLengthDelimitedFromCord,
                           Context...>::value,
                       int> = 0>
  absl::Status DynamicHandleLengthDelimitedFromCord(Accepted&& accepted,
                                                    CordIteratorSpan repr,
                                                    std::string& scratch,
                                                    Context&... context) const {
    return std::apply(
        [&](auto&... inner_context) {
          return field_handler_.DynamicHandleLengthDelimitedFromCord(
              std::forward<Accepted>(accepted), std::move(repr), scratch,
              inner_context...);
        },
        InnerContext(context...));
  }

  template <typename Accepted, typename... Context,
            std::enable_if_t<
                IsProjectedFieldHandler<
                    serialized_message_reader_internal::
                        IsDynamicFieldHandlerForLengthDelimitedFromString,
                    Context...>::value,
                int> = 0>
  absl::Status DynamicHandleLengthDelimitedFromString(
      Accepted&& accepted, absl::string_view repr, Context&... context) const {
    return std::apply(
        [&](auto&... inner_context) {
          return field_handler_.DynamicHandleLengthDelimitedFromString(
              std::forward<Accepted>(accepted), repr, inner_context...);
        },
        InnerContext(context...));
  }

  template <typename... Context,
            std::enable_if_t<
                IsProjectedFieldHandler<serialized_message_reader_internal::
                                            IsStaticFieldHandlerForStartGroup,
                                        Context...>::value,
                int> = 0>
  absl::Status HandleStartGroup(Context&... context) const {
    return std::apply(
        [&](auto&... inner_context) {
          return field_handler_.HandleStartGroup(inner_context...);
        },
        InnerContext(context...));
  }

  template <typename DependentFieldHandler = FieldHandler,
            std::enable_if_t<serialized_message_reader_internal::
                                 IsDynamicFieldHandlerForStartGroupSomeContext<
                                     DependentFieldHandler>::value,
                             int> = 0>
  auto AcceptStartGroup(int field_number) const {
    return field_handler_.AcceptStartGroup(field_number);
  }

  template <typename Accepted, typename... Context,
            std::enable_if_t<
                IsProjectedFieldHandler<serialized_message_reader_internal::
                                            IsDynamicFieldHandlerForStartGroup,
                                        Context...>::value,
                int> = 0>
  absl::Status DynamicHandleStartGroup(Accepted&& accepted,
                                       Context&... context) const {
    return std::apply(
        [&](auto&... inner_context) {
          return field_handler_.DynamicHandleStartGroup(
              std::forward<Accepted>(accepted), inner_context...);
        },
        InnerContext(context...));
  }

  template <typename... Context,
            std::enable_if_t<
                IsProjectedFieldHandler<serialized_message_reader_internal::
                                            IsStaticFieldHandlerForEndGroup,
                                        Context...>::value,
                int> = 0>
  absl::Status HandleEndGroup(Context&... context) const {
    return std::apply(
        [&](auto&... inner_context) {
          return field_handler_.HandleEndGroup(inner_context...);
        },
        InnerContext(context...));
  }

  template <typename DependentFieldHandler = FieldHandler,
            std::enable_if_t<serialized_message_reader_internal::
                                 IsDynamicFieldHandlerForEndGroupSomeContext<
                                     DependentFieldHandler>::value,
                             int> = 0>
  auto AcceptEndGroup(int field_number) const {
    return field_handler_.AcceptEndGroup(field_number);
  }

  template <typename Accepted, typename... Context,
            std::enable_if_t<
                IsProjectedFieldHandler<serialized_message_reader_internal::
                                            IsDynamicFieldHandlerForEndGroup,
                                        Context...>::value,
                int> = 0>
  absl::Status DynamicHandleEndGroup(Accepted&& accepted,
                                     Context&... context) const {
    return std::apply(
        [&](auto&... inner_context) {
          return field_handler_.DynamicHandleEndGroup(
              std::forward<Accepted>(accepted), inner_context...);
        },
        InnerContext(context...));
  }

 private:
  template <typename... Context>
  static
      typename context_projection_internal::InnerContextTuple<projection,
                                                              Context...>::type
      InnerContext(Context&... context) {
    // If `projection` returns a reference, this returns a tuple with a single
    // element. If `projection` returns a tuple of references, this returns the
    // tuple while the braces are redundant.
    return {std::invoke(projection, context...)};
  }

  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS FieldHandler field_handler_;
};

}  // namespace riegeli

#endif  // RIEGELI_MESSAGES_CONTEXT_PROJECTION_H_


================================================
FILE: riegeli/messages/dynamic_field_handler.h
================================================
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_MESSAGES_DYNAMIC_FIELD_HANDLER_H_
#define RIEGELI_MESSAGES_DYNAMIC_FIELD_HANDLER_H_

#include <cstdint>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/cord_iterator_span.h"
#include "riegeli/bytes/limiting_reader.h"
#include "riegeli/messages/serialized_message_reader.h"
#include "riegeli/messages/serialized_message_reader_internal.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

template <typename FieldHandler>
class DynamicFieldHandlerType;

// A field handler for `SerializedMessageReader` for a single field, with the
// field number specified at runtime.
//
// It is created from an unbound field handler.
//
// `DynamicFieldHandler` is similar to a `FieldHandlerMap` with a single
// registered field handler, but more efficient and without type erasure.
template <typename BaseFieldHandler>
constexpr DynamicFieldHandlerType<std::decay_t<BaseFieldHandler>>
DynamicFieldHandler(int field_number, BaseFieldHandler&& field_handler) {
  return DynamicFieldHandlerType<std::decay_t<BaseFieldHandler>>(
      field_number, std::forward<BaseFieldHandler>(field_handler));
}

// Implementation details follow.
template <typename BaseFieldHandler>
class DynamicFieldHandlerType {
 private:
  struct Accepted {};

  class MaybeAccepted {
   public:
    explicit MaybeAccepted(bool accepted) : accepted_(accepted) {}

    MaybeAccepted(const MaybeAccepted& that) = default;

    explicit operator bool() const { return accepted_; }

    Accepted operator*() const { return Accepted(); }

   private:
    bool accepted_;
  };

 public:
  template <
      typename BaseFieldHandlerInitializer,
      std::enable_if_t<std::is_convertible_v<BaseFieldHandlerInitializer&&,
                                             BaseFieldHandler>,
                       int> = 0>
  explicit constexpr DynamicFieldHandlerType(
      int field_number, BaseFieldHandlerInitializer&& base_field_handler)
      : field_number_(field_number),
        base_field_handler_(
            std::forward<BaseFieldHandlerInitializer>(base_field_handler)) {}

  // Implement the field handler protocol.

  static constexpr int kFieldNumber = kDynamicFieldNumber;

  ABSL_ATTRIBUTE_ALWAYS_INLINE
  MaybeAccepted AcceptVarint(int field_number) const {
    return MaybeAccepted(field_number == field_number_);
  }

  template <
      typename... Context,
      std::enable_if_t<
          std::conjunction_v<
              serialized_message_reader_internal::IsUnboundFieldHandler<
                  BaseFieldHandler, Context...>,
              serialized_message_reader_internal::IsStaticFieldHandlerForVarint<
                  BaseFieldHandler, Context...>>,
          int> = 0>
  absl::Status DynamicHandleVarint(Accepted, uint64_t repr,
                                   Context&... context) const {
    return base_field_handler_.HandleVarint(repr, context...);
  }

  ABSL_ATTRIBUTE_ALWAYS_INLINE
  MaybeAccepted AcceptFixed32(int field_number) const {
    return MaybeAccepted(field_number == field_number_);
  }

  template <
      typename... Context,
      std::enable_if_t<
          std::conjunction_v<
              serialized_message_reader_internal::IsUnboundFieldHandler<
                  BaseFieldHandler, Context...>,
              serialized_message_reader_internal::
                  IsStaticFieldHandlerForFixed32<BaseFieldHandler, Context...>>,
          int> = 0>
  absl::Status DynamicHandleFixed32(Accepted, uint32_t repr,
                                    Context&... context) const {
    return base_field_handler_.HandleFixed32(repr, context...);
  }

  ABSL_ATTRIBUTE_ALWAYS_INLINE
  MaybeAccepted AcceptFixed64(int field_number) const {
    return MaybeAccepted(field_number == field_number_);
  }

  template <
      typename... Context,
      std::enable_if_t<
          std::conjunction_v<
              serialized_message_reader_internal::IsUnboundFieldHandler<
                  BaseFieldHandler, Context...>,
              serialized_message_reader_internal::
                  IsStaticFieldHandlerForFixed64<BaseFieldHandler, Context...>>,
          int> = 0>
  absl::Status DynamicHandleFixed64(Accepted, uint64_t repr,
                                    Context&... context) const {
    return base_field_handler_.HandleFixed64(repr, context...);
  }

  ABSL_ATTRIBUTE_ALWAYS_INLINE MaybeAccepted
  AcceptLengthDelimited(int field_number) const {
    return MaybeAccepted(field_number == field_number_);
  }

  template <typename... Context,
            std::enable_if_t<
                std::conjunction_v<
                    serialized_message_reader_internal::IsUnboundFieldHandler<
                        BaseFieldHandler, Context...>,
                    serialized_message_reader_internal::
                        IsStaticFieldHandlerForLengthDelimitedFromReader<
                            BaseFieldHandler, Context...>>,
                int> = 0>
  absl::Status DynamicHandleLengthDelimitedFromReader(
      Accepted, ReaderSpan<> repr, Context&... context) const {
    return base_field_handler_.HandleLengthDelimitedFromReader(std::move(repr),
                                                               context...);
  }

  template <typename... Context,
            std::enable_if_t<
                std::conjunction_v<
                    serialized_message_reader_internal::IsUnboundFieldHandler<
                        BaseFieldHandler, Context...>,
                    serialized_message_reader_internal::
                        IsStaticFieldHandlerForLengthDelimitedFromCord<
                            BaseFieldHandler, Context...>>,
                int> = 0>
  absl::Status DynamicHandleLengthDelimitedFromCord(Accepted,
                                                    CordIteratorSpan repr,
                                                    std::string& scratch,
                                                    Context&... context) const {
    return base_field_handler_.HandleLengthDelimitedFromCord(
        std::move(repr), scratch, context...);
  }

  template <typename... Context,
            std::enable_if_t<
                std::conjunction_v<
                    serialized_message_reader_internal::IsUnboundFieldHandler<
                        BaseFieldHandler, Context...>,
                    serialized_message_reader_internal::
                        IsStaticFieldHandlerForLengthDelimitedFromString<
                            BaseFieldHandler, Context...>>,
                int> = 0>
  absl::Status DynamicHandleLengthDelimitedFromString(
      Accepted, absl::string_view repr, Context&... context) const {
    return base_field_handler_.HandleLengthDelimitedFromString(repr,
                                                               context...);
  }

  ABSL_ATTRIBUTE_ALWAYS_INLINE
  MaybeAccepted AcceptStartGroup(int field_number) const {
    return MaybeAccepted(field_number == field_number_);
  }

  template <typename... Context,
            std::enable_if_t<
                std::conjunction_v<
                    serialized_message_reader_internal::IsUnboundFieldHandler<
                        BaseFieldHandler, Context...>,
                    serialized_message_reader_internal::
                        IsStaticFieldHandlerForStartGroup<BaseFieldHandler,
                                                          Context...>>,
                int> = 0>
  absl::Status DynamicHandleStartGroup(Accepted, Context&... context) const {
    return base_field_handler_.HandleStartGroup(context...);
  }

  ABSL_ATTRIBUTE_ALWAYS_INLINE
  MaybeAccepted AcceptEndGroup(int field_number) const {
    return MaybeAccepted(field_number == field_number_);
  }

  template <typename... Context,
            std::enable_if_t<
                std::conjunction_v<
                    serialized_message_reader_internal::IsUnboundFieldHandler<
                        BaseFieldHandler, Context...>,
                    serialized_message_reader_internal::
                        IsStaticFieldHandlerForEndGroup<BaseFieldHandler,
                                                        Context...>>,
                int> = 0>
  absl::Status DynamicHandleEndGroup(Accepted, Context&... context) const {
    return base_field_handler_.HandleEndGroup(context...);
  }

 private:
  int field_number_;
  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS BaseFieldHandler base_field_handler_;
};

}  // namespace riegeli

#endif  // RIEGELI_MESSAGES_DYNAMIC_FIELD_HANDLER_H_


================================================
FILE: riegeli/messages/field_copier.h
================================================
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_MESSAGES_FIELD_COPIER_H_
#define RIEGELI_MESSAGES_FIELD_COPIER_H_

#include <stdint.h>

#include <optional>
#include <string>
#include <tuple>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/cord_iterator_span.h"
#include "riegeli/bytes/limiting_reader.h"
#include "riegeli/messages/message_wire_format.h"
#include "riegeli/messages/serialized_message_reader.h"
#include "riegeli/messages/serialized_message_writer.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// The type returned by `FieldCopier()`.
template <int field_number, WireTypeSet wire_types = AllWireTypes>
class FieldCopierType;

// The type returned by `DynamicFieldCopier()`.
template <typename Accept, WireTypeSet wire_types = AllWireTypes>
class DynamicFieldCopierType;

// The type of the `accept` function used by `AnyFieldCopier()`.
struct AcceptAnyField;

// The type returned by `AnyFieldCopier()`.
using AnyFieldCopierType = DynamicFieldCopierType<AcceptAnyField>;

// The type returned by `UnboundFieldCopier()`.
template <WireTypeSet wire_types = AllWireTypes>
class UnboundFieldCopierType;

// A field handler for `SerializedMessageReader` which copies the given field to
// a `SerializedMessageWriter`.
//
// As an optimization, `wire_types` constrains the set of wire types to handle.
// This yields smaller and faster code.
//
// `Context...` types must contain exactly one occurrence of
// `SerializedMessageWriter`. Use `ContextProjection()` to select the
// `SerializedMessageWriter` if this is not the case.
template <int field_number, WireTypeSet wire_types = AllWireTypes>
constexpr FieldCopierType<field_number, wire_types> FieldCopier() {
  return FieldCopierType<field_number, wire_types>();
}

// A field handler for `SerializedMessageReader` which copies the given field to
// a `SerializedMessageWriter`, with the predicate over field numbers specified
// at runtime, possibly mapping them to other field numbers.
//
// `accept` is an invocable taking the field number as `int` and returning
// `std::optional<int>`. If it returns a value other than `std::nullopt`,
// the field is copied, and the returned value is used as the new field number.
//
// `Context...` types must contain exactly one occurrence of
// `SerializedMessageWriter`. Use `ContextProjection()` to select the
// `SerializedMessageWriter` if this is not the case.
template <WireTypeSet wire_types = AllWireTypes, typename Accept>
constexpr DynamicFieldCopierType<std::decay_t<Accept>, wire_types>
DynamicFieldCopier(Accept&& accept) {
  return DynamicFieldCopierType<std::decay_t<Accept>, wire_types>(
      std::forward<Accept>(accept));
}

// A field handler for `SerializedMessageReader` which copies any field to a
// `SerializedMessageWriter`.
//
// It is meant to be used as the last field handler, so that remaining fields
// not handled by previous field handlers will be copied unchanged.
//
// `Context...` types must contain exactly one occurrence of
// `SerializedMessageWriter`. Use `ContextProjection()` to select the
// `SerializedMessageWriter` if this is not the case.
constexpr AnyFieldCopierType AnyFieldCopier();

// An unbound field handler for a `DynamicFieldHandler` or `FieldHandlerMap`
// which copies the current field to a `SerializedMessageWriter`, with the
// source and destination field numbers specified at runtime.
//
// As an optimization, `wire_types` constrains the set of wire types to handle.
// This yields smaller and faster code.
//
// `Context...` types must contain exactly one occurrence of
// `SerializedMessageWriter`. Use `ContextProjection()` to select the
// `SerializedMessageWriter` if this is not the case.
template <WireTypeSet wire_types = AllWireTypes>
constexpr UnboundFieldCopierType<wire_types> UnboundFieldCopier(
    int field_number) {
  return UnboundFieldCopierType<wire_types>(field_number);
}

// Implementation details follow.

template <int field_number, WireTypeSet wire_types>
class FieldCopierType {
 public:
  static constexpr int kFieldNumber = field_number;

  constexpr FieldCopierType() = default;

  FieldCopierType(const FieldCopierType& that) = default;
  FieldCopierType& operator=(const FieldCopierType& that) = default;

  template <typename... Context, WireTypeSet dependent_wire_types = wire_types,
            std::enable_if_t<(dependent_wire_types & WireTypeSet::kVarint) ==
                                 WireTypeSet::kVarint,
                             int> = 0>
  absl::Status HandleVarint(uint64_t repr, Context&... context) const {
    return message_writer(context...).WriteUInt64(field_number, repr);
  }

  template <typename... Context, WireTypeSet dependent_wire_types = wire_types,
            std::enable_if_t<(dependent_wire_types & WireTypeSet::kFixed32) ==
                                 WireTypeSet::kFixed32,
                             int> = 0>
  absl::Status HandleFixed32(uint32_t repr, Context&... context) const {
    return message_writer(context...).WriteFixed32(field_number, repr);
  }

  template <typename... Context, WireTypeSet dependent_wire_types = wire_types,
            std::enable_if_t<(dependent_wire_types & WireTypeSet::kFixed64) ==
                                 WireTypeSet::kFixed64,
                             int> = 0>
  absl::Status HandleFixed64(uint64_t repr, Context&... context) const {
    return message_writer(context...).WriteFixed64(field_number, repr);
  }

  template <
      typename... Context, WireTypeSet dependent_wire_types = wire_types,
      std::enable_if_t<(dependent_wire_types & WireTypeSet::kLengthDelimited) ==
                           WireTypeSet::kLengthDelimited,
                       int> = 0>
  absl::Status HandleLengthDelimitedFromReader(ReaderSpan<> repr,
                                               Context&... context) const {
    return message_writer(context...)
        .WriteString(field_number, std::move(repr));
  }

  template <
      typename... Context, WireTypeSet dependent_wire_types = wire_types,
      std::enable_if_t<(dependent_wire_types & WireTypeSet::kLengthDelimited) ==
                           WireTypeSet::kLengthDelimited,
                       int> = 0>
  absl::Status HandleLengthDelimitedFromCord(
      CordIteratorSpan repr, ABSL_ATTRIBUTE_UNUSED std::string& scratch,
      Context&... context) const {
    return message_writer(context...)
        .WriteString(field_number, std::move(repr));
  }

  template <
      typename... Context, WireTypeSet dependent_wire_types = wire_types,
      std::enable_if_t<(dependent_wire_types & WireTypeSet::kLengthDelimited) ==
                           WireTypeSet::kLengthDelimited,
                       int> = 0>
  absl::Status HandleLengthDelimitedFromString(absl::string_view repr,
                                               Context&... context) const {
    return message_writer(context...).WriteString(field_number, repr);
  }

  template <
      typename... Context, WireTypeSet dependent_wire_types = wire_types,
      std::enable_if_t<(dependent_wire_types & WireTypeSet::kStartGroup) ==
                           WireTypeSet::kStartGroup,
                       int> = 0>
  absl::Status HandleStartGroup(Context&... context) const {
    return message_writer(context...).OpenGroup(field_number);
  }

  template <typename... Context, WireTypeSet dependent_wire_types = wire_types,
            std::enable_if_t<(dependent_wire_types & WireTypeSet::kEndGroup) ==
                                 WireTypeSet::kEndGroup,
                             int> = 0>
  absl::Status HandleEndGroup(Context&... context) const {
    return message_writer(context...).CloseGroup(field_number);
  }

 private:
  template <typename... Context>
  static SerializedMessageWriter& message_writer(Context&... context) {
    return std::get<SerializedMessageWriter&>(
        std::tuple<Context&...>(context...));
  }
};

template <typename Accept, WireTypeSet wire_types>
class DynamicFieldCopierType {
 public:
  static constexpr int kFieldNumber = kDynamicFieldNumber;

  template <typename AcceptInitializer,
            std::enable_if_t<std::is_convertible_v<AcceptInitializer&&, Accept>,
                             int> = 0>
  explicit constexpr DynamicFieldCopierType(AcceptInitializer&& accept)
      : accept_(std::forward<AcceptInitializer>(accept)) {}

  DynamicFieldCopierType() = default;

  DynamicFieldCopierType(const DynamicFieldCopierType& that) = default;
  DynamicFieldCopierType& operator=(const DynamicFieldCopierType& that) =
      default;

  template <typename... Context, WireTypeSet dependent_wire_types = wire_types,
            std::enable_if_t<(dependent_wire_types & WireTypeSet::kVarint) ==
                                 WireTypeSet::kVarint,
                             int> = 0>
  std::optional<int> AcceptVarint(int field_number) const {
    return accept_(field_number);
  }

  template <typename... Context, WireTypeSet dependent_wire_types = wire_types,
            std::enable_if_t<(dependent_wire_types & WireTypeSet::kVarint) ==
                                 WireTypeSet::kVarint,
                             int> = 0>
  absl::Status DynamicHandleVarint(int field_number, uint64_t repr,
                                   Context&... context) const {
    return message_writer(context...).WriteUInt64(field_number, repr);
  }

  template <typename... Context, WireTypeSet dependent_wire_types = wire_types,
            std::enable_if_t<(dependent_wire_types & WireTypeSet::kFixed32) ==
                                 WireTypeSet::kFixed32,
                             int> = 0>
  std::optional<int> AcceptFixed32(int field_number) const {
    return accept_(field_number);
  }

  template <typename... Context, WireTypeSet dependent_wire_types = wire_types,
            std::enable_if_t<(dependent_wire_types & WireTypeSet::kFixed32) ==
                                 WireTypeSet::kFixed32,
                             int> = 0>
  absl::Status DynamicHandleFixed32(int field_number, uint32_t repr,
                                    Context&... context) const {
    return message_writer(context...).WriteFixed32(field_number, repr);
  }

  template <typename... Context, WireTypeSet dependent_wire_types = wire_types,
            std::enable_if_t<(dependent_wire_types & WireTypeSet::kFixed64) ==
                                 WireTypeSet::kFixed64,
                             int> = 0>
  std::optional<int> AcceptFixed64(int field_number) const {
    return accept_(field_number);
  }

  template <typename... Context, WireTypeSet dependent_wire_types = wire_types,
            std::enable_if_t<(dependent_wire_types & WireTypeSet::kFixed64) ==
                                 WireTypeSet::kFixed64,
                             int> = 0>
  absl::Status DynamicHandleFixed64(int field_number, uint64_t repr,
                                    Context&... context) const {
    return message_writer(context...).WriteFixed64(field_number, repr);
  }

  template <typename... Context>
  std::optional<int> AcceptLengthDelimited(int field_number) const {
    return accept_(field_number);
  }

  template <
      typename... Context, WireTypeSet dependent_wire_types = wire_types,
      std::enable_if_t<(dependent_wire_types & WireTypeSet::kLengthDelimited) ==
                           WireTypeSet::kLengthDelimited,
                       int> = 0>
  absl::Status DynamicHandleLengthDelimitedFromReader(
      int field_number, ReaderSpan<> repr, Context&... context) const {
    return message_writer(context...)
        .WriteString(field_number, std::move(repr));
  }

  template <
      typename... Context, WireTypeSet dependent_wire_types = wire_types,
      std::enable_if_t<(dependent_wire_types & WireTypeSet::kLengthDelimited) ==
                           WireTypeSet::kLengthDelimited,
                       int> = 0>
  absl::Status DynamicHandleLengthDelimitedFromCord(
      int field_number, CordIteratorSpan repr,
      ABSL_ATTRIBUTE_UNUSED std::string& scratch, Context&... context) const {
    return message_writer(context...)
        .WriteString(field_number, std::move(repr));
  }

  template <
      typename... Context, WireTypeSet dependent_wire_types = wire_types,
      std::enable_if_t<(dependent_wire_types & WireTypeSet::kLengthDelimited) ==
                           WireTypeSet::kLengthDelimited,
                       int> = 0>
  absl::Status DynamicHandleLengthDelimitedFromString(
      int field_number, absl::string_view repr, Context&... context) const {
    return message_writer(context...).WriteString(field_number, repr);
  }

  template <
      typename... Context, WireTypeSet dependent_wire_types = wire_types,
      std::enable_if_t<(dependent_wire_types & WireTypeSet::kStartGroup) ==
                           WireTypeSet::kStartGroup,
                       int> = 0>
  std::optional<int> AcceptStartGroup(int field_number) const {
    return accept_(field_number);
  }

  template <
      typename... Context, WireTypeSet dependent_wire_types = wire_types,
      std::enable_if_t<(dependent_wire_types & WireTypeSet::kStartGroup) ==
                           WireTypeSet::kStartGroup,
                       int> = 0>
  absl::Status DynamicHandleStartGroup(int field_number,
                                       Context&... context) const {
    return message_writer(context...).OpenGroup(field_number);
  }

  template <typename... Context, WireTypeSet dependent_wire_types = wire_types,
            std::enable_if_t<(dependent_wire_types & WireTypeSet::kEndGroup) ==
                                 WireTypeSet::kEndGroup,
                             int> = 0>
  std::optional<int> AcceptEndGroup(int field_number) const {
    return accept_(field_number);
  }

  template <typename... Context, WireTypeSet dependent_wire_types = wire_types,
            std::enable_if_t<(dependent_wire_types & WireTypeSet::kEndGroup) ==
                                 WireTypeSet::kEndGroup,
                             int> = 0>
  absl::Status DynamicHandleEndGroup(int field_number,
                                     Context&... context) const {
    return message_writer(context...).CloseGroup(field_number);
  }

 private:
  template <typename... Context>
  static SerializedMessageWriter& message_writer(Context&... context) {
    return std::get<SerializedMessageWriter&>(
        std::tuple<Context&...>(context...));
  }

  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS Accept accept_;
};

struct AcceptAnyField {
  std::optional<int> operator()(int field_number) const { return field_number; }
};

constexpr AnyFieldCopierType AnyFieldCopier() {
  return AnyFieldCopierType(AcceptAnyField());
}

template <WireTypeSet wire_types>
class UnboundFieldCopierType {
 public:
  static constexpr int kFieldNumber = kUnboundFieldNumber;

  constexpr UnboundFieldCopierType(int field_number)
      : field_number_(field_number) {}

  UnboundFieldCopierType(const UnboundFieldCopierType& that) = default;
  UnboundFieldCopierType& operator=(const UnboundFieldCopierType& that) =
      default;

  template <typename... Context, WireTypeSet dependent_wire_types = wire_types,
            std::enable_if_t<(dependent_wire_types & WireTypeSet::kVarint) ==
                                 WireTypeSet::kVarint,
                             int> = 0>
  absl::Status HandleVarint(uint64_t repr, Context&... context) const {
    return message_writer(context...).WriteUInt64(field_number_, repr);
  }

  template <typename... Context, WireTypeSet dependent_wire_types = wire_types,
            std::enable_if_t<(dependent_wire_types & WireTypeSet::kFixed32) ==
                                 WireTypeSet::kFixed32,
                             int> = 0>
  absl::Status HandleFixed32(uint32_t repr, Context&... context) const {
    return message_writer(context...).WriteFixed32(field_number_, repr);
  }

  template <typename... Context, WireTypeSet dependent_wire_types = wire_types,
            std::enable_if_t<(dependent_wire_types & WireTypeSet::kFixed64) ==
                                 WireTypeSet::kFixed64,
                             int> = 0>
  absl::Status HandleFixed64(uint64_t repr, Context&... context) const {
    return message_writer(context...).WriteFixed64(field_number_, repr);
  }

  template <
      typename... Context, WireTypeSet dependent_wire_types = wire_types,
      std::enable_if_t<(dependent_wire_types & WireTypeSet::kLengthDelimited) ==
                           WireTypeSet::kLengthDelimited,
                       int> = 0>
  absl::Status HandleLengthDelimitedFromReader(ReaderSpan<> repr,
                                               Context&... context) const {
    return message_writer(context...)
        .WriteString(field_number_, std::move(repr));
  }

  template <
      typename... Context, WireTypeSet dependent_wire_types = wire_types,
      std::enable_if_t<(dependent_wire_types & WireTypeSet::kLengthDelimited) ==
                           WireTypeSet::kLengthDelimited,
                       int> = 0>
  absl::Status HandleLengthDelimitedFromCord(
      CordIteratorSpan repr, ABSL_ATTRIBUTE_UNUSED std::string& scratch,
      Context&... context) const {
    return message_writer(context...)
        .WriteString(field_number_, std::move(repr));
  }

  template <
      typename... Context, WireTypeSet dependent_wire_types = wire_types,
      std::enable_if_t<(dependent_wire_types & WireTypeSet::kLengthDelimited) ==
                           WireTypeSet::kLengthDelimited,
                       int> = 0>
  absl::Status HandleLengthDelimitedFromString(absl::string_view repr,
                                               Context&... context) const {
    return message_writer(context...).WriteString(field_number_, repr);
  }

  template <
      typename... Context, WireTypeSet dependent_wire_types = wire_types,
      std::enable_if_t<(dependent_wire_types & WireTypeSet::kStartGroup) ==
                           WireTypeSet::kStartGroup,
                       int> = 0>
  absl::Status HandleStartGroup(Context&... context) const {
    return message_writer(context...).OpenGroup(field_number_);
  }

  template <typename... Context, WireTypeSet dependent_wire_types = wire_types,
            std::enable_if_t<(dependent_wire_types & WireTypeSet::kEndGroup) ==
                                 WireTypeSet::kEndGroup,
                             int> = 0>
  absl::Status HandleEndGroup(Context&... context) const {
    return message_writer(context...).CloseGroup(field_number_);
  }

 private:
  template <typename... Context>
  static SerializedMessageWriter& message_writer(Context&... context) {
    return std::get<SerializedMessageWriter&>(
        std::tuple<Context&...>(context...));
  }

  int field_number_;
};

}  // namespace riegeli

#endif  // RIEGELI_MESSAGES_FIELD_COPIER_H_


================================================
FILE: riegeli/messages/field_handler_map.h
================================================
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_MESSAGES_FIELD_HANDLER_MAP_H_
#define RIEGELI_MESSAGES_FIELD_HANDLER_MAP_H_

#include <stddef.h>
#include <stdint.h>

#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/container/flat_hash_map.h"
#include "absl/functional/any_invocable.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/cord_iterator_span.h"
#include "riegeli/base/hybrid_direct_map.h"
#include "riegeli/base/initializer.h"
#include "riegeli/bytes/limiting_reader.h"
#include "riegeli/messages/serialized_message_reader.h"
#include "riegeli/messages/serialized_message_reader_internal.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

template <typename... Context>
class FieldHandlerMapBuilder;

// A map of field handlers for `SerializedMessageReader` registered at runtime.
//
// A `FieldHandlerMap` is itself a dynamic field handler, used with
// `SerializedMessageReader` with compatible `Context...`.
//
// `FieldHandlerMap` is created from `FieldHandlerMap::Builder`, with which
// field handlers are registered.
//
// Registered field handlers must be unbound, not yet associated with a field
// number. The field number is specified during registration.
//
// Field handlers for length-delimited fields must be directly applicable to a
// string source. This makes `FieldHandlerMap` itself directly applicable to a
// string or `Cord` source.
//
// `DynamicFieldHandler` is similar to a `FieldHandlerMap` with a single
// registered field handler, but more efficient and without type erasure.
template <typename... Context>
class FieldHandlerMap {
 private:
  template <typename... Value>
  using FieldAction =
      absl::AnyInvocable<absl::Status(Value..., Context&...) const>;

  struct LengthDelimitedActions;

 public:
  // Prepares a `FieldHandlerMap`.
  using Builder = FieldHandlerMapBuilder<Context...>;

  // Creates an empty `FieldHandlerMap`. Designed for `Reset()`.
  FieldHandlerMap() = default;

  // Builds a `FieldHandlerMap`.
  explicit FieldHandlerMap(Builder&& builder);

  FieldHandlerMap(FieldHandlerMap&& that) = default;
  FieldHandlerMap& operator=(FieldHandlerMap&& that) = default;

  // Makes `*this` equivalent to a newly constructed `FieldHandlerMap`.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset();
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Builder&& builder);

  // Implement the field handler protocol.

  static constexpr int kFieldNumber = kDynamicFieldNumber;

  ABSL_ATTRIBUTE_ALWAYS_INLINE
  const FieldAction<uint64_t>* absl_nullable AcceptVarint(
      int field_number) const {
    return varint_handlers_.FindOrNull(field_number);
  }

  absl::Status DynamicHandleVarint(const FieldAction<uint64_t>& handler,
                                   uint64_t repr, Context&... context) const {
    return handler(repr, context...);
  }

  ABSL_ATTRIBUTE_ALWAYS_INLINE
  const FieldAction<uint32_t>* absl_nullable AcceptFixed32(
      int field_number) const {
    return fixed32_handlers_.FindOrNull(field_number);
  }

  absl::Status DynamicHandleFixed32(const FieldAction<uint32_t>& handler,
                                    uint32_t repr, Context&... context) const {
    return handler(repr, context...);
  }

  ABSL_ATTRIBUTE_ALWAYS_INLINE
  const FieldAction<uint64_t>* absl_nullable AcceptFixed64(
      int field_number) const {
    return fixed64_handlers_.FindOrNull(field_number);
  }

  absl::Status DynamicHandleFixed64(const FieldAction<uint64_t>& handler,
                                    uint64_t repr, Context&... context) const {
    return handler(repr, context...);
  }

  ABSL_ATTRIBUTE_ALWAYS_INLINE
  const LengthDelimitedActions* absl_nullable AcceptLengthDelimited(
      int field_number) const {
    return length_delimited_handlers_.FindOrNull(field_number);
  }

  absl::Status DynamicHandleLengthDelimitedFromReader(
      const LengthDelimitedActions& handler, ReaderSpan<> repr,
      Context&... context) const {
    return handler.action_from_reader(std::move(repr), context...);
  }

  absl::Status DynamicHandleLengthDelimitedFromCord(
      const LengthDelimitedActions& handler, CordIteratorSpan repr,
      std::string& scratch, Context&... context) const {
    return handler.action_from_cord(std::move(repr), scratch, context...);
  }

  absl::Status DynamicHandleLengthDelimitedFromString(
      const LengthDelimitedActions& handler, absl::string_view repr,
      Context&... context) const {
    return handler.action_from_string(repr, context...);
  }

  ABSL_ATTRIBUTE_ALWAYS_INLINE
  const FieldAction<>* absl_nullable AcceptStartGroup(int field_number) const {
    return start_group_handlers_.FindOrNull(field_number);
  }

  absl::Status DynamicHandleStartGroup(const FieldAction<>& handler,
                                       Context&... context) const {
    return handler(context...);
  }

  ABSL_ATTRIBUTE_ALWAYS_INLINE
  const FieldAction<>* absl_nullable AcceptEndGroup(int field_number) const {
    return end_group_handlers_.FindOrNull(field_number);
  }

  absl::Status DynamicHandleEndGroup(const FieldAction<>& handler,
                                     Context&... context) const {
    return handler(context...);
  }

 private:
  // For `FieldAction` and `LengthDelimitedActions`.
  friend class FieldHandlerMapBuilder<Context...>;

  template <typename Value>
  using FieldMap = HybridDirectMap<int, Value, HybridDirectTraits<int, 1>>;

  FieldMap<FieldAction<uint64_t>> varint_handlers_;
  FieldMap<FieldAction<uint32_t>> fixed32_handlers_;
  FieldMap<FieldAction<uint64_t>> fixed64_handlers_;
  FieldMap<LengthDelimitedActions> length_delimited_handlers_;
  FieldMap<FieldAction<>> start_group_handlers_;
  FieldMap<FieldAction<>> end_group_handlers_;
};

template <typename... Context>
explicit FieldHandlerMap(FieldHandlerMapBuilder<Context...>&& builder)
    -> FieldHandlerMap<Context...>;

template <typename... Context>
class FieldHandlerMapBuilder {
 public:
  // Creates an empty `Builder`.
  FieldHandlerMapBuilder() = default;

  FieldHandlerMapBuilder(FieldHandlerMapBuilder&& that) = default;
  FieldHandlerMapBuilder& operator=(FieldHandlerMapBuilder&& that) = default;

  // Makes `*this` equivalent to a newly constructed `Builder`.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset();

  // Registers a field handler for a field with the given `field_number`.
  // The field handler must be unbound, not yet associated with a field number.
  // A field handler for a length-delimited field must be directly applicable
  // to a string source.
  //
  // Returns `true` if successful, or `false` if `field_number` was already
  // registered.
  template <typename FieldHandler,
            std::enable_if_t<IsUnboundFieldHandlerFromString<
                                 TargetT<FieldHandler>, Context...>::value,
                             int> = 0>
  bool RegisterField(int field_number, const FieldHandler& field_handler);

 private:
  friend class FieldHandlerMap<Context...>;  // For member variables.

  template <typename... Value>
  using FieldAction =
      typename FieldHandlerMap<Context...>::template FieldAction<Value...>;
  using LengthDelimitedActions =
      typename FieldHandlerMap<Context...>::LengthDelimitedActions;

  absl::flat_hash_map<int, FieldAction<uint64_t>> varint_handlers_;
  absl::flat_hash_map<int, FieldAction<uint32_t>> fixed32_handlers_;
  absl::flat_hash_map<int, FieldAction<uint64_t>> fixed64_handlers_;
  absl::flat_hash_map<int, LengthDelimitedActions> length_delimited_handlers_;
  absl::flat_hash_map<int, FieldAction<>> start_group_handlers_;
  absl::flat_hash_map<int, FieldAction<>> end_group_handlers_;
};

// Implementation details follow.

template <typename... Context>
void FieldHandlerMapBuilder<Context...>::Reset() {
  varint_handlers_.clear();
  fixed32_handlers_.clear();
  fixed64_handlers_.clear();
  length_delimited_handlers_.clear();
  start_group_handlers_.clear();
  end_group_handlers_.clear();
}

template <typename... Context>
template <typename FieldHandler,
          std::enable_if_t<IsUnboundFieldHandlerFromString<
                               TargetT<FieldHandler>, Context...>::value,
                           int>>
bool FieldHandlerMapBuilder<Context...>::RegisterField(
    int field_number, const FieldHandler& field_handler) {
  bool all_registered = true;
  if constexpr (serialized_message_reader_internal::
                    IsStaticFieldHandlerForVarint<TargetT<FieldHandler>,
                                                  Context...>::value) {
    if (ABSL_PREDICT_FALSE(
            !varint_handlers_
                 .try_emplace(
                     field_number,
                     [field_handler = TargetT<FieldHandler>(field_handler)](
                         uint64_t repr, Context&... context) {
                       return field_handler.HandleVarint(repr, context...);
                     })
                 .second)) {
      all_registered = false;
    }
  }
  if constexpr (serialized_message_reader_internal::
                    IsStaticFieldHandlerForFixed32<TargetT<FieldHandler>,
                                                   Context...>::value) {
    if (ABSL_PREDICT_FALSE(
            !fixed32_handlers_
                 .try_emplace(
                     field_number,
                     [field_handler = TargetT<FieldHandler>(field_handler)](
                         uint32_t repr, Context&... context) {
                       return field_handler.HandleFixed32(repr, context...);
                     })
                 .second)) {
      all_registered = false;
    }
  }
  if constexpr (serialized_message_reader_internal::
                    IsStaticFieldHandlerForFixed64<TargetT<FieldHandler>,
                                                   Context...>::value) {
    if (ABSL_PREDICT_FALSE(
            !fixed64_handlers_
                 .try_emplace(
                     field_number,
                     [field_handler = TargetT<FieldHandler>(field_handler)](
                         uint64_t repr, Context&... context) {
                       return field_handler.HandleFixed64(repr, context...);
                     })
                 .second)) {
      all_registered = false;
    }
  }
  if constexpr (serialized_message_reader_internal::
                    IsStaticFieldHandlerForLengthDelimitedFromString<
                        TargetT<FieldHandler>, Context...>::value) {
    if (ABSL_PREDICT_FALSE(
            !length_delimited_handlers_
                 .try_emplace(
                     field_number,
                     [field_handler = TargetT<FieldHandler>(field_handler)](
                         ReaderSpan<> repr, Context&... context) {
                       if constexpr (
                           serialized_message_reader_internal::
                               IsStaticFieldHandlerForLengthDelimitedFromReader<
                                   TargetT<FieldHandler>, Context...>::value) {
                         return field_handler.HandleLengthDelimitedFromReader(
                             std::move(repr), context...);
                       } else {
                         absl::string_view value;
                         if (ABSL_PREDICT_FALSE(!repr.reader().Read(
                                 IntCast<size_t>(repr.length()), value))) {
                           return serialized_message_reader_internal::
                               ReadLengthDelimitedValueError(repr.reader());
                         }
                         return field_handler.HandleLengthDelimitedFromString(
                             value, context...);
                       }
                     },
                     [field_handler = TargetT<FieldHandler>(field_handler)](
                         CordIteratorSpan repr, std::string& scratch,
                         Context&... context) {
                       if constexpr (
                           serialized_message_reader_internal::
                               IsStaticFieldHandlerForLengthDelimitedFromCord<
                                   TargetT<FieldHandler>, Context...>::value) {
                         return field_handler.HandleLengthDelimitedFromCord(
                             std::move(repr), scratch, context...);
                       } else {
                         return field_handler.HandleLengthDelimitedFromString(
                             std::move(repr).ToStringView(scratch), context...);
                       }
                     },
                     [field_handler = TargetT<FieldHandler>(field_handler)](
                         absl::string_view repr, Context&... context) {
                       return field_handler.HandleLengthDelimitedFromString(
                           repr, context...);
                     })
                 .second)) {
      all_registered = false;
    }
  }
  if constexpr (serialized_message_reader_internal::
                    IsStaticFieldHandlerForStartGroup<TargetT<FieldHandler>,
                                                      Context...>::value) {
    if (ABSL_PREDICT_FALSE(
            !start_group_handlers_
                 .try_emplace(field_number,
                              [field_handler = TargetT<FieldHandler>(
                                   field_handler)](Context&... context) {
                                return field_handler.HandleStartGroup(
                                    context...);
                              })
                 .second)) {
      all_registered = false;
    }
  }
  if constexpr (serialized_message_reader_internal::
                    IsStaticFieldHandlerForEndGroup<TargetT<FieldHandler>,
                                                    Context...>::value) {
    if (ABSL_PREDICT_FALSE(
            !end_group_handlers_
                 .try_emplace(field_number,
                              [field_handler = TargetT<FieldHandler>(
                                   field_handler)](Context&... context) {
                                return field_handler.HandleEndGroup(context...);
                              })
                 .second)) {
      all_registered = false;
    }
  }
  return all_registered;
}

template <typename... Context>
struct FieldHandlerMap<Context...>::LengthDelimitedActions {
#if !__cpp_aggregate_paren_init
  LengthDelimitedActions() = default;

  template <
      typename ActionFromReader, typename ActionFromCord,
      typename ActionFromString,
      std::enable_if_t<
          std::conjunction_v<
              std::is_convertible<ActionFromReader&&,
                                  FieldAction<ReaderSpan<>>>,
              std::is_convertible<ActionFromCord&&,
                                  FieldAction<CordIteratorSpan, std::string&>>,
              std::is_convertible<ActionFromString&&,
                                  FieldAction<absl::string_view>>>,
          int> = 0>
  explicit LengthDelimitedActions(ActionFromReader&& action_from_reader,
                                  ActionFromCord&& action_from_cord,
                                  ActionFromString&& action_from_string)
      : action_from_reader(std::forward<ActionFromReader>(action_from_reader)),
        action_from_cord(std::forward<ActionFromCord>(action_from_cord)),
        action_from_string(std::forward<ActionFromString>(action_from_string)) {
  }

  LengthDelimitedActions(LengthDelimitedActions&& that) = default;
  LengthDelimitedActions& operator=(LengthDelimitedActions&& that) = default;
#endif

  FieldAction<ReaderSpan<>> action_from_reader;
  FieldAction<CordIteratorSpan, std::string&> action_from_cord;
  FieldAction<absl::string_view> action_from_string;
};

template <typename... Context>
FieldHandlerMap<Context...>::FieldHandlerMap(Builder&& builder)
    : varint_handlers_(std::move(builder.varint_handlers_)),
      fixed32_handlers_(std::move(builder.fixed32_handlers_)),
      fixed64_handlers_(std::move(builder.fixed64_handlers_)),
      length_delimited_handlers_(std::move(builder.length_delimited_handlers_)),
      start_group_handlers_(std::move(builder.start_group_handlers_)),
      end_group_handlers_(std::move(builder.end_group_handlers_)) {}

template <typename... Context>
void FieldHandlerMap<Context...>::Reset() {
  varint_handlers_.Reset();
  fixed32_handlers_.Reset();
  fixed64_handlers_.Reset();
  length_delimited_handlers_.Reset();
  start_group_handlers_.Reset();
  end_group_handlers_.Reset();
}

template <typename... Context>
void FieldHandlerMap<Context...>::Reset(Builder&& builder) {
  varint_handlers_.Reset(std::move(builder.varint_handlers_));
  fixed32_handlers_.Reset(std::move(builder.fixed32_handlers_));
  fixed64_handlers_.Reset(std::move(builder.fixed64_handlers_));
  length_delimited_handlers_.Reset(
      std::move(builder.length_delimited_handlers_));
  start_group_handlers_.Reset(std::move(builder.start_group_handlers_));
  end_group_handlers_.Reset(std::move(builder.end_group_handlers_));
}

}  // namespace riegeli

#endif  // RIEGELI_MESSAGES_FIELD_HANDLER_MAP_H_


================================================
FILE: riegeli/messages/field_handlers.cc
================================================
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/messages/field_handlers.h"

#include <stddef.h>
#include <stdint.h>

#include <utility>

#include "absl/base/nullability.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/bytes/reader.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli::field_handlers_internal {

absl::Status AnnotateByReader(absl::Status status, Reader& reader) {
  if (absl::IsCancelled(status)) {
    return status;
  } else {
    return reader.StatusOrAnnotate(std::move(status));
  }
}

template <>
absl::Status VarintOverflowError<int32_t, field_handlers::VarintKind::kPlain>(
    uint64_t repr) {
  return absl::InvalidArgumentError(
      absl::StrCat("int32 field overflow: ", repr));
}

template <>
absl::Status VarintOverflowError<uint32_t, field_handlers::VarintKind::kPlain>(
    uint64_t repr) {
  return absl::InvalidArgumentError(
      absl::StrCat("uint32 field overflow: ", repr));
}

template <>
absl::Status VarintOverflowError<int32_t, field_handlers::VarintKind::kSigned>(
    uint64_t repr) {
  return absl::InvalidArgumentError(
      absl::StrCat("sint32 field overflow: ", repr));
}

template <>
absl::Status VarintOverflowError<bool, field_handlers::VarintKind::kPlain>(
    uint64_t repr) {
  return absl::InvalidArgumentError(
      absl::StrCat("bool field overflow: ", repr));
}

absl::Status EnumOverflowError(uint64_t repr) {
  return absl::InvalidArgumentError(
      absl::StrCat("enum field overflow: ", repr));
}

template <>
absl::Status VarintOverflowError<int32_t, field_handlers::VarintKind::kPlain>(
    Reader& src, uint64_t repr) {
  return src.StatusOrAnnotate(
      VarintOverflowError<int32_t, field_handlers::VarintKind::kPlain>(repr));
}

template <>
absl::Status VarintOverflowError<uint32_t, field_handlers::VarintKind::kPlain>(
    Reader& src, uint64_t repr) {
  return src.StatusOrAnnotate(
      VarintOverflowError<uint32_t, field_handlers::VarintKind::kPlain>(repr));
}

template <>
absl::Status VarintOverflowError<int32_t, field_handlers::VarintKind::kSigned>(
    Reader& src, uint64_t repr) {
  return src.StatusOrAnnotate(
      VarintOverflowError<int32_t, field_handlers::VarintKind::kSigned>(repr));
}

absl::Status EnumOverflowError(Reader& src, uint64_t repr) {
  return src.StatusOrAnnotate(EnumOverflowError(repr));
}

template <>
absl::Status VarintOverflowError<bool, field_handlers::VarintKind::kPlain>(
    Reader& src, uint64_t repr) {
  return src.StatusOrAnnotate(
      VarintOverflowError<bool, field_handlers::VarintKind::kPlain>(repr));
}

absl::Status ReadPackedVarintError() {
  return absl::InvalidArgumentError(
      "Could not read a varint element of a packed repeated field");
}

absl::Status ReadPackedVarintError(Reader& src) {
  return src.StatusOrAnnotate(ReadPackedVarintError());
}

template <>
absl::Status ReadPackedFixedError<sizeof(uint32_t)>() {
  return absl::InvalidArgumentError(
      "Could not read a fixed32 element of a packed repeated field");
}

template <>
absl::Status ReadPackedFixedError<sizeof(uint64_t)>() {
  return absl::InvalidArgumentError(
      "Could not read a fixed64 element of a packed repeated field");
}

template <>
absl::Status ReadPackedFixedError<sizeof(uint32_t)>(Reader& src) {
  return src.StatusOrAnnotate(ReadPackedFixedError<sizeof(uint32_t)>());
}

template <>
absl::Status ReadPackedFixedError<sizeof(uint64_t)>(Reader& src) {
  return src.StatusOrAnnotate(ReadPackedFixedError<sizeof(uint64_t)>());
}

}  // namespace riegeli::field_handlers_internal


================================================
FILE: riegeli/messages/field_handlers.h
================================================
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_MESSAGES_FIELD_HANDLERS_H_
#define RIEGELI_MESSAGES_FIELD_HANDLERS_H_

#include <stddef.h>
#include <stdint.h>

#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/casts.h"
#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/cord_iterator_span.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/limiting_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/endian/endian_reading.h"
#include "riegeli/messages/serialized_message_reader.h"
#include "riegeli/messages/serialized_message_reader_internal.h"
#include "riegeli/varint/varint_reading.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

namespace field_handlers {

// The kind of a varint field, assuming that its C++ type is known.
enum class VarintKind {
  kPlain,   // `int32`, `int64`, `uint32`, `uint64`, `bool`
  kSigned,  // `sint32`, `sint64`
  kEnum,    // `enum`
};

// Class types of common field handlers. Usually it is enough to use `auto`
// instead of spelling these types.
template <typename Value, VarintKind kind, int field_number, typename Action>
class OnOptionalVarintType;
template <typename Value, VarintKind kind, int field_number, typename Action>
class OnRepeatedVarintType;
template <typename Value, int field_number, typename Action>
class OnOptionalFixedType;
template <typename Value, int field_number, typename Action>
class OnRepeatedFixedType;
template <typename BaseFieldHandler, typename PackedFieldHandler>
class OnPackedType;
template <int field_number, typename Action>
class OnLengthDelimitedType;
template <int field_number, typename Action>
class BeforeGroupType;
template <int field_number, typename Action>
class AfterGroupType;

// Common field handlers for `SerializedMessageReader`, `DynamicFieldHandler`,
// and `FieldHandlerMap`.
//
// For a `MessageType` with generated code, the field number of a field named
// `foo_bar` can be obtained as `MessageType::kFooBarFieldNumber`.
//
// For numeric fields, `Optional` and `Repeated` variants of field handlers are
// provided. An `Optional` variant is intended for singular fields, and it
// will be called also for elements of a non-packed repeated field (which have
// the same wire representation). A `Repeated` variant is intended for repeated
// fields (packed or not), and it will be called also for singular fields
// (which have the same wire representation as a non-packed repeated field).
//
// For varint fields, in contrast to native proto parsing, 64-bit values which
// overflow the provided 32-bit type are reported as errors instead of being
// silently truncated.
//
// Two kinds of field handlers are provided by these functions:
//
//  * Static, which handle a single field number known at compile time.
//    The `field_number` template parameter must be specified and positive.
//
//  * Unbound, which are meant to be registered in a `FieldHandlerMap` with a
//    field number specified during registration. The `field_number` template
//    parameter must not be specified.

// Field handler of a singular `int32` field. The value is provided as
// `int32_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnOptionalVarintType<int32_t, VarintKind::kPlain, field_number,
                               std::decay_t<Action>>
OnOptionalInt32(Action&& action) {
  return OnOptionalVarintType<int32_t, VarintKind::kPlain, field_number,
                              std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of an element of a repeated `int32` field. The value is
// provided as `int32_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnRepeatedVarintType<int32_t, VarintKind::kPlain, field_number,
                               std::decay_t<Action>>
OnRepeatedInt32(Action&& action) {
  return OnRepeatedVarintType<int32_t, VarintKind::kPlain, field_number,
                              std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of a singular `int64` field. The value is provided as
// `int64_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnOptionalVarintType<int64_t, VarintKind::kPlain, field_number,
                               std::decay_t<Action>>
OnOptionalInt64(Action&& action) {
  return OnOptionalVarintType<int64_t, VarintKind::kPlain, field_number,
                              std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of an element of a repeated `int64` field. The value is
// provided as `int64_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnRepeatedVarintType<int64_t, VarintKind::kPlain, field_number,
                               std::decay_t<Action>>
OnRepeatedInt64(Action&& action) {
  return OnRepeatedVarintType<int64_t, VarintKind::kPlain, field_number,
                              std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of a singular `uint32` field. The value is provided as
// `uint32_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnOptionalVarintType<uint32_t, VarintKind::kPlain, field_number,
                               std::decay_t<Action>>
OnOptionalUInt32(Action&& action) {
  return OnOptionalVarintType<uint32_t, VarintKind::kPlain, field_number,
                              std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of an element of a repeated `uint32` field. The value is
// provided as `uint32_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnRepeatedVarintType<uint32_t, VarintKind::kPlain, field_number,
                               std::decay_t<Action>>
OnRepeatedUInt32(Action&& action) {
  return OnRepeatedVarintType<uint32_t, VarintKind::kPlain, field_number,
                              std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of a singular `uint64` field. The value is provided as
// `uint64_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnOptionalVarintType<uint64_t, VarintKind::kPlain, field_number,
                               std::decay_t<Action>>
OnOptionalUInt64(Action&& action) {
  return OnOptionalVarintType<uint64_t, VarintKind::kPlain, field_number,
                              std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of an element of a repeated `uint64` field. The value is
// provided as `uint64_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnRepeatedVarintType<uint64_t, VarintKind::kPlain, field_number,
                               std::decay_t<Action>>
OnRepeatedUInt64(Action&& action) {
  return OnRepeatedVarintType<uint64_t, VarintKind::kPlain, field_number,
                              std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of a singular `sint32` field. The value is provided as
// `int32_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnOptionalVarintType<int32_t, VarintKind::kSigned, field_number,
                               std::decay_t<Action>>
OnOptionalSInt32(Action&& action) {
  return OnOptionalVarintType<int32_t, VarintKind::kSigned, field_number,
                              std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of an element of a repeated `sint32` field. The value is
// provided as `int32_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnRepeatedVarintType<int32_t, VarintKind::kSigned, field_number,
                               std::decay_t<Action>>
OnRepeatedSInt32(Action&& action) {
  return OnRepeatedVarintType<int32_t, VarintKind::kSigned, field_number,
                              std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of a singular `sint64` field. The value is provided as
// `int64_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnOptionalVarintType<int64_t, VarintKind::kSigned, field_number,
                               std::decay_t<Action>>
OnOptionalSInt64(Action&& action) {
  return OnOptionalVarintType<int64_t, VarintKind::kSigned, field_number,
                              std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of an element of a repeated `sint64` field. The value is
// provided as `int64_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnRepeatedVarintType<int64_t, VarintKind::kSigned, field_number,
                               std::decay_t<Action>>
OnRepeatedSInt64(Action&& action) {
  return OnRepeatedVarintType<int64_t, VarintKind::kSigned, field_number,
                              std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of a singular `bool` field. The value is provided as `bool`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnOptionalVarintType<bool, VarintKind::kPlain, field_number,
                               std::decay_t<Action>>
OnOptionalBool(Action&& action) {
  return OnOptionalVarintType<bool, VarintKind::kPlain, field_number,
                              std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of an element of a repeated `bool` field. The value is provided
// as `bool`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnRepeatedVarintType<bool, VarintKind::kPlain, field_number,
                               std::decay_t<Action>>
OnRepeatedBool(Action&& action) {
  return OnRepeatedVarintType<bool, VarintKind::kPlain, field_number,
                              std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of a singular `fixed32` field. The value is provided as
// `uint32_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnOptionalFixedType<uint32_t, field_number, std::decay_t<Action>>
OnOptionalFixed32(Action&& action) {
  return OnOptionalFixedType<uint32_t, field_number, std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of an element of a repeated `fixed32` field. The value is
// provided as `uint32_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnRepeatedFixedType<uint32_t, field_number, std::decay_t<Action>>
OnRepeatedFixed32(Action&& action) {
  return OnRepeatedFixedType<uint32_t, field_number, std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of a singular `fixed64` field. The value is provided as
// `uint64_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnOptionalFixedType<uint64_t, field_number, std::decay_t<Action>>
OnOptionalFixed64(Action&& action) {
  return OnOptionalFixedType<uint64_t, field_number, std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of an element of a repeated `fixed64` field. The value is
// provided as `uint64_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnRepeatedFixedType<uint64_t, field_number, std::decay_t<Action>>
OnRepeatedFixed64(Action&& action) {
  return OnRepeatedFixedType<uint64_t, field_number, std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of a singular `sfixed32` field. The value is provided as
// `int32_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnOptionalFixedType<int32_t, field_number, std::decay_t<Action>>
OnOptionalSFixed32(Action&& action) {
  return OnOptionalFixedType<int32_t, field_number, std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of an element of a repeated `sfixed32` field. The value is
// provided as `int32_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnRepeatedFixedType<int32_t, field_number, std::decay_t<Action>>
OnRepeatedSFixed32(Action&& action) {
  return OnRepeatedFixedType<int32_t, field_number, std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of a singular `sfixed64` field. The value is provided as
// `int64_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnOptionalFixedType<int64_t, field_number, std::decay_t<Action>>
OnOptionalSFixed64(Action&& action) {
  return OnOptionalFixedType<int64_t, field_number, std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of an element of a repeated `sfixed64` field. The value is
// provided as `int64_t`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnRepeatedFixedType<int64_t, field_number, std::decay_t<Action>>
OnRepeatedSFixed64(Action&& action) {
  return OnRepeatedFixedType<int64_t, field_number, std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of a singular `float` field. The value is provided as `float`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnOptionalFixedType<float, field_number, std::decay_t<Action>>
OnOptionalFloat(Action&& action) {
  return OnOptionalFixedType<float, field_number, std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of an element of a repeated `float` field. The value is
// provided as `float`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnRepeatedFixedType<float, field_number, std::decay_t<Action>>
OnRepeatedFloat(Action&& action) {
  return OnRepeatedFixedType<float, field_number, std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of a singular `double` field. The value is provided as
// `double`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnOptionalFixedType<double, field_number, std::decay_t<Action>>
OnOptionalDouble(Action&& action) {
  return OnOptionalFixedType<double, field_number, std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of an element of a repeated `double` field. The value is
// provided as `double`.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnRepeatedFixedType<double, field_number, std::decay_t<Action>>
OnRepeatedDouble(Action&& action) {
  return OnRepeatedFixedType<double, field_number, std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of a singular enum field. The value is provided as an enum type
// (C++ or proto enum) or an integral type.
template <
    typename EnumType, int field_number = kUnboundFieldNumber, typename Action,
    std::enable_if_t<
        std::disjunction_v<std::is_enum<EnumType>, std::is_integral<EnumType>>,
        int> = 0>
constexpr OnOptionalVarintType<EnumType, VarintKind::kEnum, field_number,
                               std::decay_t<Action>>
OnOptionalEnum(Action&& action) {
  return OnOptionalVarintType<EnumType, VarintKind::kEnum, field_number,
                              std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler of an element of a repeated enum field. The value is provided
// as an enum type (C++ or proto enum) or an integral type.
template <
    typename EnumType, int field_number = kUnboundFieldNumber, typename Action,
    std::enable_if_t<
        std::disjunction_v<std::is_enum<EnumType>, std::is_integral<EnumType>>,
        int> = 0>
constexpr OnRepeatedVarintType<EnumType, VarintKind::kEnum, field_number,
                               std::decay_t<Action>>
OnRepeatedEnum(Action&& action) {
  return OnRepeatedVarintType<EnumType, VarintKind::kEnum, field_number,
                              std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler with a dedicated implementation for a packed repeated field.
//
// Uses `BaseFieldHandler` for scalar wire types, and `PackedFieldHandler` for
// length-delimited wire type.
//
// Regular `OnRepeated...()` field handlers already support packed repeated
// fields, but they call the base action repeatedly for each element, which
// can be less efficient than a dedicated implementation.
template <typename BaseFieldHandler, typename PackedFieldHandler>
constexpr OnPackedType<std::decay_t<BaseFieldHandler>,
                       std::decay_t<PackedFieldHandler>>
OnPacked(BaseFieldHandler&& base_field_handler,
         PackedFieldHandler&& packed_action) {
  return OnPackedType<std::decay_t<BaseFieldHandler>,
                      std::decay_t<PackedFieldHandler>>(
      std::forward<BaseFieldHandler>(base_field_handler),
      std::forward<PackedFieldHandler>(packed_action));
}

// Field handler of a singular or an element of a repeated `string`, `bytes`,
// or submessage field.
//
// For a `Reader` source, the value is provided as `ReaderSpan<>`,
// `absl::string_view`, `std::string&&`, `Chain&&`, or `absl::Cord&&`,
// depending on what is accepted.
//
// For a `Cord` source, the value is provided as `CordIteratorSpan`,
// `absl::string_view`, `std::string&&`, `Chain&&`, or `absl::Cord&&`,
// depending on what is accepted.
//
// For a string source, the value is provided as `absl::string_view`,
// `std::string&&`, `Chain&&`, or `absl::Cord&&`, depending on what is accepted.
//
// For a string source, if the action accepts `absl::string_view`, then the
// value is guaranteed to be a substring of the original string. This guarantee
// is absent for a `Reader` or `Cord` source.
//
// If the action accepts the first candidate for each source type, i.e.
// `ReaderSpan<>`, `CordIteratorSpan`, and `absl::string_view`, then the action
// parameter can be declared as `auto`. This is convenient if the implementation
// can treat these types uniformly, e.g. when the value is passed to
// `riegeli::ParseMessage()`, `SerializedMessageReader::ReadMessage()`, or
// `SerializedMessageWriter::WriteString()`.
//
// Alternatively, the action can use `absl::Overload{}` to provide variants with
// separate implementations for these parameter types.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr OnLengthDelimitedType<field_number, std::decay_t<Action>>
OnLengthDelimited(Action&& action) {
  return OnLengthDelimitedType<field_number, std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler called before the given group.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr BeforeGroupType<field_number, std::decay_t<Action>> BeforeGroup(
    Action&& action) {
  return BeforeGroupType<field_number, std::decay_t<Action>>(
      std::forward<Action>(action));
}

// Field handler called after the given group.
template <int field_number = kUnboundFieldNumber, typename Action>
constexpr AfterGroupType<field_number, std::decay_t<Action>> AfterGroup(
    Action&& action) {
  return AfterGroupType<field_number, std::decay_t<Action>>(
      std::forward<Action>(action));
}

}  // namespace field_handlers

// Implementation details follow.

namespace field_handlers_internal {

ABSL_ATTRIBUTE_COLD absl::Status AnnotateByReader(absl::Status status,
                                                  Reader& reader);

ABSL_ATTRIBUTE_COLD absl::Status EnumOverflowError(uint64_t repr);

template <typename Value, field_handlers::VarintKind kind>
absl::Status VarintOverflowError(uint64_t repr) {
  RIEGELI_ASSERT(kind == field_handlers::VarintKind::kEnum)
      << "Remaining VarintOverflowError() instantiations should be for enums";
  return EnumOverflowError(repr);
}
template <>
ABSL_ATTRIBUTE_COLD absl::Status
VarintOverflowError<int32_t, field_handlers::VarintKind::kPlain>(uint64_t repr);
template <>
ABSL_ATTRIBUTE_COLD absl::Status
VarintOverflowError<uint32_t, field_handlers::VarintKind::kPlain>(
    uint64_t repr);
template <>
ABSL_ATTRIBUTE_COLD absl::Status
VarintOverflowError<int32_t, field_handlers::VarintKind::kSigned>(
    uint64_t repr);
template <>
ABSL_ATTRIBUTE_COLD absl::Status
VarintOverflowError<bool, field_handlers::VarintKind::kPlain>(uint64_t repr);

ABSL_ATTRIBUTE_COLD absl::Status EnumOverflowError(Reader& src, uint64_t repr);

template <typename Value, field_handlers::VarintKind kind>
absl::Status VarintOverflowError(Reader& src, uint64_t repr) {
  RIEGELI_ASSERT(kind == field_handlers::VarintKind::kEnum)
      << "Remaining VarintOverflowError() instantiations should be for enums";
  return EnumOverflowError(src, repr);
}
template <>
ABSL_ATTRIBUTE_COLD absl::Status
VarintOverflowError<int32_t, field_handlers::VarintKind::kPlain>(Reader& src,
                                                                 uint64_t repr);
template <>
ABSL_ATTRIBUTE_COLD absl::Status
VarintOverflowError<uint32_t, field_handlers::VarintKind::kPlain>(
    Reader& src, uint64_t repr);
template <>
ABSL_ATTRIBUTE_COLD absl::Status
VarintOverflowError<int32_t, field_handlers::VarintKind::kSigned>(
    Reader& src, uint64_t repr);
template <>
ABSL_ATTRIBUTE_COLD absl::Status
VarintOverflowError<bool, field_handlers::VarintKind::kPlain>(Reader& src,
                                                              uint64_t repr);

ABSL_ATTRIBUTE_COLD absl::Status ReadPackedVarintError();

ABSL_ATTRIBUTE_COLD absl::Status ReadPackedVarintError(Reader& src);

template <size_t size>
absl::Status ReadPackedFixedError();
template <>
ABSL_ATTRIBUTE_COLD absl::Status ReadPackedFixedError<sizeof(uint32_t)>();
template <>
ABSL_ATTRIBUTE_COLD absl::Status ReadPackedFixedError<sizeof(uint64_t)>();

template <size_t size>
absl::Status ReadPackedFixedError(Reader& src);
template <>
ABSL_ATTRIBUTE_COLD absl::Status ReadPackedFixedError<sizeof(uint32_t)>(
    Reader& src);
template <>
ABSL_ATTRIBUTE_COLD absl::Status ReadPackedFixedError<sizeof(uint64_t)>(
    Reader& src);

template <typename Value, field_handlers::VarintKind kind>
inline bool VarintIsValid(uint64_t repr) {
  if constexpr (std::disjunction_v<std::is_same<Value, uint64_t>,
                                   std::is_same<Value, int64_t>>) {
    return true;
  } else if constexpr (kind == field_handlers::VarintKind::kSigned) {
    return uint64_t{static_cast<std::make_unsigned_t<Value>>(repr)} == repr;
  } else if constexpr (std::is_enum_v<Value>) {
    static_assert(kind == field_handlers::VarintKind::kEnum);
    return static_cast<uint64_t>(
               static_cast<std::underlying_type_t<Value>>(repr)) == repr;
  } else {
    return static_cast<uint64_t>(static_cast<Value>(repr)) == repr;
  }
}

template <typename Value, field_handlers::VarintKind kind>
inline Value DecodeVarint(uint64_t repr) {
  if constexpr (kind == field_handlers::VarintKind::kSigned) {
    return static_cast<Value>(DecodeVarintSigned64(repr));
  } else if constexpr (std::is_enum_v<Value>) {
    static_assert(kind == field_handlers::VarintKind::kEnum);
    // Casting an out of range value to an enum has undefined behavior.
    // Casting such a value to an integral type wraps around.
    return static_cast<Value>(static_cast<std::underlying_type_t<Value>>(repr));
  } else {
    return static_cast<Value>(repr);
  }
}

}  // namespace field_handlers_internal

namespace field_handlers {

template <typename Value, VarintKind kind, int field_number, typename Action>
class OnOptionalVarintType {
 public:
  static constexpr int kFieldNumber = field_number;

  template <typename ActionInitializer,
            std::enable_if_t<std::is_convertible_v<ActionInitializer&&, Action>,
                             int> = 0>
  explicit constexpr OnOptionalVarintType(ActionInitializer&& action)
      : action_(std::forward<ActionInitializer>(action)) {}

  template <
      typename... Context,
      std::enable_if_t<std::is_invocable_v<const Action&, Value, Context&...>,
                       int> = 0>
  absl::Status HandleVarint(uint64_t repr, Context&... context) const {
    if (ABSL_PREDICT_FALSE(
            (!field_handlers_internal::VarintIsValid<Value, kind>(repr)))) {
      return field_handlers_internal::VarintOverflowError<Value, kind>(repr);
    }
    return action_(field_handlers_internal::DecodeVarint<Value, kind>(repr),
                   context...);
  }

 protected:
  const Action& action() const { return action_; }

 private:
  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS Action action_;
};

template <typename Value, VarintKind kind, int field_number, typename Action>
class OnRepeatedVarintType
    : public OnOptionalVarintType<Value, kind, field_number, Action> {
 public:
  using OnRepeatedVarintType::OnOptionalVarintType::OnOptionalVarintType;

  template <
      typename... Context,
      std::enable_if_t<std::is_invocable_v<const Action&, Value, Context&...>,
                       int> = 0>
  absl::Status HandleLengthDelimitedFromReader(ReaderSpan<> repr,
                                               Context&... context) const;

  template <
      typename... Context,
      std::enable_if_t<std::is_invocable_v<const Action&, Value, Context&...>,
                       int> = 0>
  absl::Status HandleLengthDelimitedFromCord(CordIteratorSpan repr,
                                             std::string& scratch,
                                             Context&... context) const;

  template <
      typename... Context,
      std::enable_if_t<std::is_invocable_v<const Action&, Value, Context&...>,
                       int> = 0>
  absl::Status HandleLengthDelimitedFromString(absl::string_view repr,
                                               Context&... context) const;
};

template <typename Value, int field_number, typename Action>
class OnOptionalFixedType {
 public:
  static constexpr int kFieldNumber = field_number;

  template <typename ActionInitializer,
            std::enable_if_t<std::is_convertible_v<ActionInitializer&&, Action>,
                             int> = 0>
  explicit constexpr OnOptionalFixedType(ActionInitializer&& action)
      : action_(std::forward<ActionInitializer>(action)) {}

  template <typename... Context,
            std::enable_if_t<
                std::conjunction_v<
                    std::bool_constant<sizeof(Value) == sizeof(uint32_t)>,
                    std::is_invocable<const Action&, Value, Context&...>>,
                int> = 0>
  absl::Status HandleFixed32(uint32_t repr, Context&... context) const {
    return action_(absl::bit_cast<Value>(repr), context...);
  }

  template <typename... Context,
            std::enable_if_t<
                std::conjunction_v<
                    std::bool_constant<sizeof(Value) == sizeof(uint64_t)>,
                    std::is_invocable<const Action&, Value, Context&...>>,
                int> = 0>
  absl::Status HandleFixed64(uint64_t repr, Context&... context) const {
    return action_(absl::bit_cast<Value>(repr), context...);
  }

 protected:
  const Action& action() const { return action_; }

 private:
  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS Action action_;
};

template <typename Value, int field_number, typename Action>
class OnRepeatedFixedType
    : public OnOptionalFixedType<Value, field_number, Action> {
 public:
  using OnRepeatedFixedType::OnOptionalFixedType::OnOptionalFixedType;

  template <
      typename... Context,
      std::enable_if_t<std::is_invocable_v<const Action&, Value, Context&...>,
                       int> = 0>
  absl::Status HandleLengthDelimitedFromReader(ReaderSpan<> repr,
                                               Context&... context) const;

  template <
      typename... Context,
      std::enable_if_t<std::is_invocable_v<const Action&, Value, Context&...>,
                       int> = 0>
  absl::Status HandleLengthDelimitedFromCord(CordIteratorSpan repr,
                                             std::string& scratch,
                                             Context&... context) const;

  template <
      typename... Context,
      std::enable_if_t<std::is_invocable_v<const Action&, Value, Context&...>,
                       int> = 0>
  absl::Status HandleLengthDelimitedFromString(absl::string_view repr,
                                               Context&... context) const;

 private:
  template <typename... Context>
  absl::Status HandleLengthDelimitedFromStringInternal(
      absl::string_view repr, Context&... context) const {
    const char* cursor = repr.data();
    const char* const limit = repr.data() + repr.size();
    while (cursor < limit) {
      const Value element = ReadLittleEndian<Value>(cursor);
      if (absl::Status status = this->action()(element, context...);
          ABSL_PREDICT_FALSE(!status.ok())) {
        return status;
      }
      cursor += sizeof(Value);
    }
    return absl::OkStatus();
  }
};

template <typename BaseFieldHandler, typename PackedFieldHandler>
class OnPackedType {
 private:
  template <typename... Context>
  struct ExpectedFieldHandlers
      : std::conjunction<
            std::disjunction<
                serialized_message_reader_internal::
                    IsStaticFieldHandlerForVarint<BaseFieldHandler, Context...>,
                serialized_message_reader_internal::
                    IsStaticFieldHandlerForFixed32<BaseFieldHandler,
                                                   Context...>,
                serialized_message_reader_internal::
                    IsStaticFieldHandlerForFixed64<BaseFieldHandler,
                                                   Context...>>,
            serialized_message_reader_internal::
                IsStaticFieldHandlerForLengthDelimited<PackedFieldHandler,
                                                       Context...>> {};

 public:
  static constexpr int kFieldNumber = BaseFieldHandler::kFieldNumber;

  template <
      typename BaseFieldHandlerInitializer,
      typename PackedFieldHandlerInitializer,
      std::enable_if_t<std::conjunction_v<
                           std::is_convertible<BaseFieldHandlerInitializer&&,
                                               BaseFieldHandler>,
                           std::is_convertible<PackedFieldHandlerInitializer&&,
                                               PackedFieldHandler>>,
                       int> = 0>
  explicit constexpr OnPackedType(
      BaseFieldHandlerInitializer&& base_field_handler,
      PackedFieldHandlerInitializer&& packed_action)
      : base_field_handler_(
            std::forward<BaseFieldHandlerInitializer>(base_field_handler)),
        packed_field_handler_(
            std::forward<PackedFieldHandlerInitializer>(packed_action)) {}

  template <
      typename... Context,
      std::enable_if_t<
          std::conjunction_v<
              ExpectedFieldHandlers<Context...>,
              serialized_message_reader_internal::IsStaticFieldHandlerForVarint<
                  BaseFieldHandler, Context...>>,
          int> = 0>
  absl::Status HandleVarint(uint64_t repr, Context&... context) const {
    return base_field_handler_.HandleVarint(repr, context...);
  }

  template <typename... Context,
            std::enable_if_t<
                std::conjunction_v<ExpectedFieldHandlers<Context...>,
                                   serialized_message_reader_internal::
                                       IsStaticFieldHandlerForFixed32<
                                           BaseFieldHandler, Context...>>,
                int> = 0>
  absl::Status HandleFixed32(uint32_t repr, Context&... context) const {
    return base_field_handler_.HandleFixed32(repr, context...);
  }

  template <typename... Context,
            std::enable_if_t<
                std::conjunction_v<ExpectedFieldHandlers<Context...>,
                                   serialized_message_reader_internal::
                                       IsStaticFieldHandlerForFixed64<
                                           BaseFieldHandler, Context...>>,
                int> = 0>
  absl::Status HandleFixed64(uint64_t repr, Context&... context) const {
    return base_field_handler_.HandleFixed64(repr, context...);
  }

  template <
      typename... Context,
      std::enable_if_t<std::conjunction_v<
                           ExpectedFieldHandlers<Context...>,
                           serialized_message_reader_internal::
                               IsStaticFieldHandlerForLengthDelimitedFromReader<
                                   PackedFieldHandler, Context...>>,
                       int> = 0>
  absl::Status HandleLengthDelimitedFromReader(ReaderSpan<> repr,
                                               Context&... context) const {
    return packed_field_handler_.HandleLengthDelimitedFromReader(
        std::move(repr), context...);
  }

  template <
      typename... Context,
      std::enable_if_t<
          std::conjunction_v<ExpectedFieldHandlers<Context...>,
                             serialized_message_reader_internal::
                                 IsStaticFieldHandlerForLengthDelimitedFromCord<
                                     PackedFieldHandler, Context...>>,
          int> = 0>
  absl::Status HandleLengthDelimitedFromCord(CordIteratorSpan repr,
                                             std::string& scratch,
                                             Context&... context) const {
    return packed_field_handler_.HandleLengthDelimitedFromCord(
        std::move(repr), scratch, context...);
  }

  template <
      typename... Context,
      std::enable_if_t<std::conjunction_v<
                           ExpectedFieldHandlers<Context...>,
                           serialized_message_reader_internal::
                               IsStaticFieldHandlerForLengthDelimitedFromString<
                                   PackedFieldHandler, Context...>>,
                       int> = 0>
  absl::Status HandleLengthDelimitedFromString(absl::string_view repr,
                                               Context&... context) const {
    return packed_field_handler_.HandleLengthDelimitedFromString(repr,
                                                                 context...);
  }

 private:
  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS BaseFieldHandler base_field_handler_;
  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS PackedFieldHandler packed_field_handler_;
};

template <int field_number, typename Action>
class OnLengthDelimitedType {
 public:
  static constexpr int kFieldNumber = field_number;

  template <typename ActionInitializer,
            std::enable_if_t<std::is_convertible_v<ActionInitializer&&, Action>,
                             int> = 0>
  explicit constexpr OnLengthDelimitedType(ActionInitializer&& action)
      : action_(std::forward<ActionInitializer>(action)) {}

  template <
      typename... Context,
      std::enable_if_t<
          std::disjunction_v<
              std::is_invocable<const Action&, ReaderSpan<>, Context&...>,
              std::is_invocable<const Action&, absl::string_view, Context&...>,
              std::is_invocable<const Action&, std::string&&, Context&...>,
              std::is_invocable<const Action&, Chain&&, Context&...>,
              std::is_invocable<const Action&, absl::Cord&&, Context&...>>,
          int> = 0>
  absl::Status HandleLengthDelimitedFromReader(ReaderSpan<> repr,
                                               Context&... context) const {
    if constexpr (std::is_invocable_v<const Action&, ReaderSpan<>,
                                      Context&...>) {
      return SkipLengthDelimitedFromReader(
          repr, [&] { return action_(std::move(repr), context...); });
    } else if constexpr (std::is_invocable_v<const Action&, absl::string_view,
                                             Context&...>) {
      return HandleString<absl::string_view>(std::move(repr), context...);
    } else if constexpr (std::is_invocable_v<const Action&, std::string&&,
                                             Context&...>) {
      return HandleString<std::string>(std::move(repr), context...);
    } else if constexpr (std::is_invocable_v<const Action&, Chain&&,
                                             Context&...>) {
      return HandleString<Chain>(std::move(repr), context...);
    } else if constexpr (std::is_invocable_v<const Action&, absl::Cord&&,
                                             Context&...>) {
      return HandleString<absl::Cord>(std::move(repr), context...);
    } else {
      static_assert(false, "No string-like type accepted");
    }
  }

  template <
      typename... Context,
      std::enable_if_t<
          std::disjunction_v<
              std::is_invocable<const Action&, CordIteratorSpan, Context&...>,
              std::is_invocable<const Action&, absl::string_view, Context&...>,
              std::is_invocable<const Action&, std::string&&, Context&...>,
              std::is_invocable<const Action&, Chain, Context&...>,
              std::is_invocable<const Action&, absl::Cord, Context&...>>,
          int> = 0>
  absl::Status HandleLengthDelimitedFromCord(CordIteratorSpan repr,
                                             std::string& scratch,
                                             Context&... context) const {
    if constexpr (std::is_invocable_v<const Action&, CordIteratorSpan,
                                      Context&...>) {
      return SkipLengthDelimitedFromCord(
          repr, [&] { return action_(std::move(repr), context...); });
    } else if constexpr (std::is_invocable_v<const Action&, absl::string_view,
                                             Context&...>) {
      return action_(std::move(repr).ToStringView(scratch), context...);
    } else if constexpr (std::is_invocable_v<const Action&, std::string&&,
                                             Context&...>) {
      std::move(repr).ToString(scratch);
      return action_(std::move(scratch), context...);
    } else if constexpr (std::is_invocable_v<const Action&, Chain,
                                             Context&...>) {
      return action_(Chain(std::move(repr).ToCord()), context...);
    } else if constexpr (std::is_invocable_v<const Action&, absl::Cord,
                                             Context&...>) {
      return action_(std::move(repr).ToCord(), context...);
    } else {
      static_assert(false, "No string-like type accepted");
    }
  }

  template <
      typename... Context,
      std::enable_if_t<
          std::disjunction_v<
              std::is_invocable<const Action&, absl::string_view, Context&...>,
              std::is_invocable<const Action&, std::string&&, Context&...>,
              std::is_invocable<const Action&, Chain&&, Context&...>,
              std::is_invocable<const Action&, absl::Cord&&, Context&...>>,
          int> = 0>
  absl::Status HandleLengthDelimitedFromString(absl::string_view repr,
                                               Context&... context) const {
    if constexpr (std::is_invocable_v<const Action&, absl::string_view,
                                      Context&...>) {
      return action_(repr, context...);
    } else if constexpr (std::is_invocable_v<const Action&, std::string&&,
                                             Context&...>) {
      return action_(std::string(repr), context...);
    } else if constexpr (std::is_invocable_v<const Action&, Chain&&,
                                             Context&...>) {
      return action_(Chain(repr), context...);
    } else if constexpr (std::is_invocable_v<const Action&, absl::Cord&&,
                                             Context&...>) {
      return action_(absl::Cord(repr), context...);
    } else {
      static_assert(false, "No string-like type accepted");
    }
  }

 private:
  template <typename StringType, typename... Context>
  absl::Status HandleString(ReaderSpan<> repr, Context&... context) const {
    StringType value;
    if (ABSL_PREDICT_FALSE(
            !repr.reader().Read(IntCast<size_t>(repr.length()), value))) {
      return serialized_message_reader_internal::ReadLengthDelimitedValueError(
          repr.reader());
    }
    absl::Status status = action_(std::move(value), context...);
    // Comparison against `absl::CancelledError()` is a fast path of
    // `absl::IsCancelled()`.
    if (ABSL_PREDICT_FALSE(!status.ok() && status != absl::CancelledError())) {
      status = field_handlers_internal::AnnotateByReader(std::move(status),
                                                         repr.reader());
    }
    return status;
  }

  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS Action action_;
};

template <int field_number, typename Action>
class BeforeGroupType {
 public:
  static constexpr int kFieldNumber = field_number;

  template <typename ActionInitializer,
            std::enable_if_t<std::is_convertible_v<ActionInitializer&&, Action>,
                             int> = 0>
  explicit constexpr BeforeGroupType(ActionInitializer&& action)
      : action_(std::forward<ActionInitializer>(action)) {}

  template <typename... Context,
            std::enable_if_t<std::is_invocable_v<const Action&, Context&...>,
                             int> = 0>
  absl::Status HandleStartGroup(Context&... context) const {
    return action_(context...);
  }

 private:
  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS Action action_;
};

template <int field_number, typename Action>
class AfterGroupType {
 public:
  static constexpr int kFieldNumber = field_number;

  template <typename ActionInitializer,
            std::enable_if_t<std::is_convertible_v<ActionInitializer&&, Action>,
                             int> = 0>
  explicit constexpr AfterGroupType(ActionInitializer&& action)
      : action_(std::forward<ActionInitializer>(action)) {}

  template <typename... Context,
            std::enable_if_t<std::is_invocable_v<const Action&, Context&...>,
                             int> = 0>
  absl::Status HandleEndGroup(Context&... context) const {
    return action_(context...);
  }

 private:
  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS Action action_;
};

template <typename Value, VarintKind kind, int field_number, typename Action>
template <typename... Context,
          std::enable_if_t<
              std::is_invocable_v<const Action&, Value, Context&...>, int>>
absl::Status OnRepeatedVarintType<Value, kind, field_number, Action>::
    HandleLengthDelimitedFromReader(ReaderSpan<> repr,
                                    Context&... context) const {
  if (repr.reader().Pull(1, IntCast<size_t>(repr.length())) &&
      repr.reader().available() >= IntCast<size_t>(repr.length())) {
    const absl::string_view value(repr.reader().cursor(),
                                  IntCast<size_t>(repr.length()));
    repr.reader().move_cursor(IntCast<size_t>(repr.length()));
    absl::Status status = HandleLengthDelimitedFromString(value, context...);
    // Comparison against `absl::CancelledError()` is a fast path of
    // `absl::IsCancelled()`.
    if (ABSL_PREDICT_FALSE(!status.ok() && status != absl::CancelledError())) {
      status = field_handlers_internal::AnnotateByReader(std::move(status),
                                                         repr.reader());
    }
    return status;
  }
  ScopedLimiter scoped_limiter(repr);
  uint64_t element;
  while (ReadVarint64(repr.reader(), element)) {
    if (ABSL_PREDICT_FALSE(
            (!field_handlers_internal::VarintIsValid<Value, kind>(element)))) {
      return field_handlers_internal::VarintOverflowError<Value, kind>(
          repr.reader(), element);
    }
    if (absl::Status status = this->action()(
            field_handlers_internal::DecodeVarint<Value, kind>(element),
            context...);
        ABSL_PREDICT_FALSE(!status.ok())) {
      // Comparison against `absl::CancelledError()` is a fast path of
      // `absl::IsCancelled()`.
      if (ABSL_PREDICT_FALSE(status != absl::CancelledError())) {
        status = field_handlers_internal::AnnotateByReader(std::move(status),
                                                           repr.reader());
      }
      return status;
    }
  }
  if (ABSL_PREDICT_FALSE(repr.reader().pos() < repr.reader().max_pos())) {
    return field_handlers_internal::ReadPackedVarintError(repr.reader());
  }
  return absl::OkStatus();
}

template <typename Value, VarintKind kind, int field_number, typename Action>
template <typename... Context,
          std::enable_if_t<
              std::is_invocable_v<const Action&, Value, Context&...>, int>>
absl::Status OnRepeatedVarintType<Value, kind, field_number, Action>::
    HandleLengthDelimitedFromCord(CordIteratorSpan repr,
                                  ABSL_ATTRIBUTE_UNUSED std::string& scratch,
                                  Context&... context) const {
  if (const absl::string_view chunk =
          absl::Cord::ChunkRemaining(repr.iterator());
      chunk.size() >= IntCast<size_t>(repr.length())) {
    const absl::string_view value =
        chunk.substr(0, IntCast<size_t>(repr.length()));
    absl::Cord::AdvanceAndRead(&repr.iterator(),
                               IntCast<size_t>(repr.length()));
    return HandleLengthDelimitedFromString(value, context...);
  }
  const size_t limit =
      CordIteratorSpan::Remaining(repr.iterator()) - repr.length();
  uint64_t element;
  while (ReadVarint64(repr.iterator(),
                      CordIteratorSpan::Remaining(repr.iterator()) - limit,
                      element)) {
    if (ABSL_PREDICT_FALSE(
            (!field_handlers_internal::VarintIsValid<Value, kind>(element)))) {
      return field_handlers_internal::VarintOverflowError<Value, kind>(element);
    }
    if (absl::Status status = this->action()(
            field_handlers_internal::DecodeVarint<Value, kind>(element),
            context...);
        ABSL_PREDICT_FALSE(!status.ok())) {
      return status;
    }
  }
  if (ABSL_PREDICT_FALSE(CordIteratorSpan::Remaining(repr.iterator()) >
                         limit)) {
    return field_handlers_internal::ReadPackedVarintError();
  }
  return absl::OkStatus();
}

template <typename Value, VarintKind kind, int field_number, typename Action>
template <typename... Context,
          std::enable_if_t<
              std::is_invocable_v<const Action&, Value, Context&...>, int>>
absl::Status OnRepeatedVarintType<Value, kind, field_number, Action>::
    HandleLengthDelimitedFromString(absl::string_view repr,
                                    Context&... context) const {
  const char* cursor = repr.data();
  const char* const limit = repr.data() + repr.size();
  uint64_t element;
  while (const size_t length =
             ReadVarint64(cursor, PtrDistance(cursor, limit), element)) {
    cursor += length;
    if (ABSL_PREDICT_FALSE(
            (!field_handlers_internal::VarintIsValid<Value, kind>(element)))) {
      return field_handlers_internal::VarintOverflowError<Value, kind>(element);
    }
    if (absl::Status status = this->action()(
            field_handlers_internal::DecodeVarint<Value, kind>(element),
            context...);
        ABSL_PREDICT_FALSE(!status.ok())) {
      return status;
    }
  }
  if (ABSL_PREDICT_FALSE(cursor < limit)) {
    return field_handlers_internal::ReadPackedVarintError();
  }
  return absl::OkStatus();
}

template <typename Value, int field_number, typename Action>
template <typename... Context,
          std::enable_if_t<
              std::is_invocable_v<const Action&, Value, Context&...>, int>>
absl::Status OnRepeatedFixedType<Value, field_number, Action>::
    HandleLengthDelimitedFromReader(ReaderSpan<> repr,
                                    Context&... context) const {
  if (ABSL_PREDICT_FALSE(repr.length() % sizeof(Value) > 0)) {
    return field_handlers_internal::ReadPackedFixedError<sizeof(Value)>(
        repr.reader());
  }
  if (repr.reader().Pull(1, IntCast<size_t>(repr.length())) &&
      repr.reader().available() >= IntCast<size_t>(repr.length())) {
    const absl::string_view value(repr.reader().cursor(),
                                  IntCast<size_t>(repr.length()));
    repr.reader().move_cursor(IntCast<size_t>(repr.length()));
    absl::Status status =
        HandleLengthDelimitedFromStringInternal(value, context...);
    // Comparison against `absl::CancelledError()` is a fast path of
    // `absl::IsCancelled()`.
    if (ABSL_PREDICT_FALSE(!status.ok() && status != absl::CancelledError())) {
      status = field_handlers_internal::AnnotateByReader(std::move(status),
                                                         repr.reader());
    }
    return status;
  }
  Position length = repr.length();
  while (length > 0) {
    Value element;
    if (ABSL_PREDICT_FALSE(!ReadLittleEndian<Value>(repr.reader(), element))) {
      return field_handlers_internal::ReadPackedFixedError<sizeof(Value)>(
          repr.reader());
    }
    if (absl::Status status = this->action()(element, context...);
        ABSL_PREDICT_FALSE(!status.ok())) {
      // Comparison against `absl::CancelledError()` is a fast path of
      // `absl::IsCancelled()`.
      if (ABSL_PREDICT_FALSE(status != absl::CancelledError())) {
        status = field_handlers_internal::AnnotateByReader(std::move(status),
                                                           repr.reader());
      }
      return status;
    }
    length -= sizeof(Value);
  }
  return absl::OkStatus();
}

template <typename Value, int field_number, typename Action>
template <typename... Context,
          std::enable_if_t<
              std::is_invocable_v<const Action&, Value, Context&...>, int>>
absl::Status
OnRepeatedFixedType<Value, field_number, Action>::HandleLengthDelimitedFromCord(
    CordIteratorSpan repr, ABSL_ATTRIBUTE_UNUSED std::string& scratch,
    Context&... context) const {
  if (ABSL_PREDICT_FALSE(repr.length() % sizeof(Value) > 0)) {
    return field_handlers_internal::ReadPackedFixedError<sizeof(Value)>();
  }
  if (const absl::string_view chunk =
          absl::Cord::ChunkRemaining(repr.iterator());
      chunk.size() >= IntCast<size_t>(repr.length())) {
    const absl::string_view value =
        chunk.substr(0, IntCast<size_t>(repr.length()));
    absl::Cord::AdvanceAndRead(&repr.iterator(),
                               IntCast<size_t>(repr.length()));
    return HandleLengthDelimitedFromStringInternal(value, context...);
  }
  Position length = repr.length();
  while (length > 0) {
    char buffer[sizeof(Value)];
    CordIteratorSpan::Read(repr.iterator(), sizeof(Value), buffer);
    const Value element = ReadLittleEndian<Value>(buffer);
    if (absl::Status status = this->action()(element, context...);
        ABSL_PREDICT_FALSE(!status.ok())) {
      return status;
    }
    length -= sizeof(Value);
  }
  return absl::OkStatus();
}

template <typename Value, int field_number, typename Action>
template <typename... Context,
          std::enable_if_t<
              std::is_invocable_v<const Action&, Value, Context&...>, int>>
absl::Status OnRepeatedFixedType<Value, field_number, Action>::
    HandleLengthDelimitedFromString(absl::string_view repr,
                                    Context&... context) const {
  if (ABSL_PREDICT_FALSE(repr.size() % sizeof(Value) > 0)) {
    return field_handlers_internal::ReadPackedFixedError<sizeof(Value)>();
  }
  return HandleLengthDelimitedFromStringInternal(repr, context...);
}

}  // namespace field_handlers

}  // namespace riegeli

#endif  // RIEGELI_MESSAGES_FIELD_HANDLERS_H_


================================================
FILE: riegeli/messages/map_entry_field.h
================================================
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_MESSAGES_MAP_ENTRY_FIELD_H_
#define RIEGELI_MESSAGES_MAP_ENTRY_FIELD_H_

#include "absl/base/nullability.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// Specifies a field of synthetic map entry message.
enum MapEntryField { kMapEntryKey = 1, kMapEntryValue = 2 };

}  // namespace riegeli

#endif  // RIEGELI_MESSAGES_MAP_ENTRY_FIELD_H_


================================================
FILE: riegeli/messages/message_wire_format.h
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_MESSAGES_MESSAGE_WIRE_FORMAT_H_
#define RIEGELI_MESSAGES_MESSAGE_WIRE_FORMAT_H_

#include <stdint.h>

namespace riegeli {

// Low level functions for writing and reading serialized proto messages
// directly.
//
// They mostly correspond to selected members of
// `google::protobuf::internal::WireFormatLite`.

// The part of a field tag which denotes the representation of the field value
// which follows the tag.
enum class WireType : uint32_t {
  kVarint = 0,
  kFixed32 = 5,
  kFixed64 = 1,
  kLengthDelimited = 2,
  kStartGroup = 3,
  kEndGroup = 4,
  kInvalid6 = 6,
  kInvalid7 = 7,
};

// Composes/decomposes a field tag.
constexpr uint32_t MakeTag(int field_number, WireType wire_type);
constexpr WireType GetTagWireType(uint32_t tag);
constexpr int GetTagFieldNumber(uint32_t tag);

// Represents a set of wire types.

enum class WireTypeSet : uint32_t {
  kVarint = 1 << static_cast<uint32_t>(WireType::kVarint),
  kFixed32 = 1 << static_cast<uint32_t>(WireType::kFixed32),
  kFixed64 = 1 << static_cast<uint32_t>(WireType::kFixed64),
  kLengthDelimited = 1 << static_cast<uint32_t>(WireType::kLengthDelimited),
  kStartGroup = 1 << static_cast<uint32_t>(WireType::kStartGroup),
  kEndGroup = 1 << static_cast<uint32_t>(WireType::kEndGroup),
  kInvalid6 = 1 << static_cast<uint32_t>(WireType::kInvalid6),
  kInvalid7 = 1 << static_cast<uint32_t>(WireType::kInvalid7),
};

constexpr WireTypeSet operator|(WireTypeSet a, WireTypeSet b) {
  return WireTypeSet{static_cast<uint32_t>(a) | static_cast<uint32_t>(b)};
}

constexpr WireTypeSet operator^(WireTypeSet a, WireTypeSet b) {
  return WireTypeSet{static_cast<uint32_t>(a) ^ static_cast<uint32_t>(b)};
}

constexpr WireTypeSet operator&(WireTypeSet a, WireTypeSet b) {
  return WireTypeSet{static_cast<uint32_t>(a) & static_cast<uint32_t>(b)};
}

template <WireType... wire_types>
constexpr WireTypeSet WireTypeSetOf() {
  return (WireTypeSet{0} | ... |
          WireTypeSet{uint32_t{1} << static_cast<uint32_t>(wire_types)});
}

constexpr WireTypeSet NoWireTypes = WireTypeSetOf<>();

constexpr WireTypeSet AllWireTypes =
    WireTypeSetOf<WireType::kVarint, WireType::kFixed32, WireType::kFixed64,
                  WireType::kLengthDelimited, WireType::kStartGroup,
                  WireType::kEndGroup, WireType::kInvalid6,
                  WireType::kInvalid7>();

constexpr WireTypeSet operator~(WireTypeSet a) { return AllWireTypes ^ a; }

// Implementation details follow.

constexpr uint32_t MakeTag(int field_number, WireType wire_type) {
  return (static_cast<uint32_t>(field_number) << 3) |
         static_cast<uint32_t>(wire_type);
}

constexpr WireType GetTagWireType(uint32_t tag) {
  return static_cast<WireType>(tag & 7);
}

constexpr int GetTagFieldNumber(uint32_t tag) {
  return static_cast<int>(tag >> 3);
}

}  // namespace riegeli

#endif  // RIEGELI_MESSAGES_MESSAGE_WIRE_FORMAT_H_


================================================
FILE: riegeli/messages/parse_message.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/messages/parse_message.h"

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <optional>
#include <string>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/io/coded_stream.h"
#include "google/protobuf/io/zero_copy_stream_impl_lite.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/cord_iterator_span.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/chain_reader.h"
#include "riegeli/bytes/cord_reader.h"
#include "riegeli/bytes/limiting_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/varint/varint_reading.h"

namespace riegeli {

namespace {

ABSL_ATTRIBUTE_COLD inline absl::Status ParseError(
    google::protobuf::MessageLite& dest) {
  return absl::InvalidArgumentError(
      absl::StrCat("Failed to parse message of type ", dest.GetTypeName()));
}

ABSL_ATTRIBUTE_COLD inline absl::Status ParseError(
    Reader& src, google::protobuf::MessageLite& dest) {
  return src.AnnotateStatus(ParseError(dest));
}

inline absl::Status CheckInitialized(google::protobuf::MessageLite& dest,
                                     ParseMessageOptions options) {
  if (!options.partial() && ABSL_PREDICT_FALSE(!dest.IsInitialized())) {
    return absl::InvalidArgumentError(
        absl::StrCat("Failed to parse message of type ", dest.GetTypeName(),
                     " because it is missing required fields: ",
                     dest.InitializationErrorString()));
  }
  return absl::OkStatus();
}

inline absl::Status CheckInitialized(Reader& src,
                                     google::protobuf::MessageLite& dest,
                                     ParseMessageOptions options) {
  if (!options.partial() && ABSL_PREDICT_FALSE(!dest.IsInitialized())) {
    return src.AnnotateStatus(absl::InvalidArgumentError(
        absl::StrCat("Failed to parse message of type ", dest.GetTypeName(),
                     " because it is missing required fields: ",
                     dest.InitializationErrorString())));
  }
  return absl::OkStatus();
}

template <typename ReaderType>
absl::Status ParseMessageFromReaderSpanImpl(ReaderSpan<ReaderType> src,
                                            google::protobuf::MessageLite& dest,
                                            ParseMessageOptions options) {
  if (!options.merge() &&
      options.recursion_limit() ==
          google::protobuf::io::CodedInputStream::GetDefaultRecursionLimit() &&
      src.length() <= kMaxBytesToCopy) {
    src.reader().Pull();
    if (src.reader().available() >= src.length()) {
      // The data are flat. `ParsePartialFromArray()` is faster than
      // `ParsePartialFromZeroCopyStream()`.
      const bool parse_ok = dest.ParsePartialFromArray(
          src.reader().cursor(), IntCast<int>(src.length()));
      src.reader().move_cursor(IntCast<size_t>(src.length()));
      if (ABSL_PREDICT_FALSE(!parse_ok)) return ParseError(src.reader(), dest);
      return CheckInitialized(src.reader(), dest, options);
    }
  }
  ScopedLimiterOrLimitingReader scoped_limiter(src);
  ReaderInputStream input_stream(&scoped_limiter.reader());
  bool parse_ok;
  if (!options.merge() &&
      options.recursion_limit() ==
          google::protobuf::io::CodedInputStream::GetDefaultRecursionLimit()) {
    parse_ok = dest.ParsePartialFromZeroCopyStream(&input_stream);
  } else {
    if (!options.merge()) dest.Clear();
    google::protobuf::io::CodedInputStream coded_stream(&input_stream);
    coded_stream.SetRecursionLimit(options.recursion_limit());
    parse_ok = dest.MergePartialFromCodedStream(&coded_stream) &&
               coded_stream.ConsumedEntireMessage();
  }
  if (ABSL_PREDICT_FALSE(!scoped_limiter.reader().ok())) {
    return scoped_limiter.reader().status();
  }
  if (ABSL_PREDICT_FALSE(!parse_ok)) {
    return ParseError(scoped_limiter.reader(), dest);
  }
  const absl::Status status =
      CheckInitialized(scoped_limiter.reader(), dest, options);
  RIEGELI_EVAL_ASSERT(scoped_limiter.Close())
      << "LimitingReader with !fail_if_longer() "
         "has no reason to fail only in Close(): "
      << scoped_limiter.reader().status();
  return status;
}

template <typename Src>
absl::Status ParseLengthPrefixedMessageImpl(Src& src,
                                            google::protobuf::MessageLite& dest,
                                            ParseMessageOptions options) {
  uint32_t length;
  if (ABSL_PREDICT_FALSE(!ReadVarint32(src, length) ||
                         length >
                             uint32_t{std::numeric_limits<int32_t>::max()})) {
    return src.StatusOrAnnotate(
        absl::InvalidArgumentError("Failed to parse message length"));
  }
  return parse_message_internal::ParseMessageImpl(
      ReaderSpan(&src, Position{length}), dest, options);
}

}  // namespace

namespace parse_message_internal {

absl::Status ParseMessageImpl(Reader& src, google::protobuf::MessageLite& dest,
                              ParseMessageOptions options) {
  if (!options.merge() &&
      options.recursion_limit() ==
          google::protobuf::io::CodedInputStream::GetDefaultRecursionLimit() &&
      src.SupportsSize()) {
    const std::optional<Position> size = src.Size();
    if (ABSL_PREDICT_FALSE(size == std::nullopt)) return src.status();
    src.Pull();
    if (src.limit_pos() == *size && src.available() <= kMaxBytesToCopy) {
      // The data are flat. `ParsePartialFromArray()` is faster than
      // `ParsePartialFromZeroCopyStream()`.
      const bool parse_ok = dest.ParsePartialFromArray(
          src.cursor(), IntCast<int>(src.available()));
      src.move_cursor(src.available());
      if (ABSL_PREDICT_FALSE(!parse_ok)) return ParseError(src, dest);
      return CheckInitialized(src, dest, options);
    }
  }
  ReaderInputStream input_stream(&src);
  bool parse_ok;
  if (!options.merge() &&
      options.recursion_limit() ==
          google::protobuf::io::CodedInputStream::GetDefaultRecursionLimit()) {
    parse_ok = dest.ParsePartialFromZeroCopyStream(&input_stream);
  } else {
    if (!options.merge()) dest.Clear();
    google::protobuf::io::CodedInputStream coded_stream(&input_stream);
    coded_stream.SetRecursionLimit(options.recursion_limit());
    parse_ok = dest.MergePartialFromCodedStream(&coded_stream) &&
               coded_stream.ConsumedEntireMessage();
  }
  if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
  if (ABSL_PREDICT_FALSE(!parse_ok)) return ParseError(src, dest);
  return CheckInitialized(src, dest, options);
}

absl::Status ParseMessageImpl(ReaderSpan<Reader> src,
                              google::protobuf::MessageLite& dest,
                              ParseMessageOptions options) {
  return ParseMessageFromReaderSpanImpl(std::move(src), dest, options);
}

absl::Status ParseMessageImpl(ReaderSpan<> src,
                              google::protobuf::MessageLite& dest,
                              ParseMessageOptions options) {
  return ParseMessageFromReaderSpanImpl(std::move(src), dest, options);
}

}  // namespace parse_message_internal

absl::Status ParseLengthPrefixedMessage(Reader& src,
                                        google::protobuf::MessageLite& dest,
                                        ParseMessageOptions options) {
  return ParseLengthPrefixedMessageImpl(src, dest, options);
}

absl::Status ParseLengthPrefixedMessage(LimitingReaderBase& src,
                                        google::protobuf::MessageLite& dest,
                                        ParseMessageOptions options) {
  return ParseLengthPrefixedMessageImpl(src, dest, options);
}

absl::Status ParseMessage(BytesRef src, google::protobuf::MessageLite& dest,
                          ParseMessageOptions options) {
  bool parse_ok;
  if (ABSL_PREDICT_FALSE(src.size() >
                         unsigned{std::numeric_limits<int>::max()})) {
    parse_ok = false;
  } else if (!options.merge() && options.recursion_limit() ==
                                     google::protobuf::io::CodedInputStream::
                                         GetDefaultRecursionLimit()) {
    parse_ok = dest.ParsePartialFromArray(src.data(), IntCast<int>(src.size()));
  } else {
    if (!options.merge()) dest.Clear();
    google::protobuf::io::ArrayInputStream input_stream(
        src.data(), IntCast<int>(src.size()));
    google::protobuf::io::CodedInputStream coded_stream(&input_stream);
    coded_stream.SetRecursionLimit(options.recursion_limit());
    parse_ok = dest.MergePartialFromCodedStream(&coded_stream) &&
               coded_stream.ConsumedEntireMessage();
  }
  if (ABSL_PREDICT_FALSE(!parse_ok)) return ParseError(dest);
  return CheckInitialized(dest, options);
}

absl::Status ParseMessage(const Chain& src, google::protobuf::MessageLite& dest,
                          ParseMessageOptions options) {
  if (!options.merge() &&
      options.recursion_limit() ==
          google::protobuf::io::CodedInputStream::GetDefaultRecursionLimit() &&
      src.size() <= kMaxBytesToCopy) {
    if (const std::optional<absl::string_view> flat = src.TryFlat();
        flat != std::nullopt) {
      // The data are flat. `ParsePartialFromArray()` is faster than
      // `ParsePartialFromZeroCopyStream()`.
      if (ABSL_PREDICT_FALSE(!dest.ParsePartialFromArray(
              flat->data(), IntCast<int>(flat->size())))) {
        return ParseError(dest);
      }
      return CheckInitialized(dest, options);
    }
  }
  ChainReader reader(&src);
  ReaderInputStream input_stream(&reader);
  bool parse_ok;
  if (!options.merge() &&
      options.recursion_limit() ==
          google::protobuf::io::CodedInputStream::GetDefaultRecursionLimit()) {
    parse_ok = dest.ParsePartialFromZeroCopyStream(&input_stream);
  } else {
    if (!options.merge()) dest.Clear();
    google::protobuf::io::CodedInputStream coded_stream(&input_stream);
    coded_stream.SetRecursionLimit(options.recursion_limit());
    parse_ok = dest.MergePartialFromCodedStream(&coded_stream) &&
               coded_stream.ConsumedEntireMessage();
  }
  RIEGELI_ASSERT_OK(reader) << "ChainReader has no reason to fail";
  if (ABSL_PREDICT_FALSE(!parse_ok)) return ParseError(dest);
  return CheckInitialized(dest, options);
}

absl::Status ParseMessage(const absl::Cord& src,
                          google::protobuf::MessageLite& dest,
                          ParseMessageOptions options) {
  if (!options.merge() &&
      options.recursion_limit() ==
          google::protobuf::io::CodedInputStream::GetDefaultRecursionLimit() &&
      src.size() <= kMaxBytesToCopy) {
    if (const std::optional<absl::string_view> flat = src.TryFlat();
        flat != std::nullopt) {
      // The data are flat. `ParsePartialFromArray()` is faster than
      // `ParsePartialFromZeroCopyStream()`.
      if (ABSL_PREDICT_FALSE(!dest.ParsePartialFromArray(
              flat->data(), IntCast<int>(flat->size())))) {
        return ParseError(dest);
      }
      return CheckInitialized(dest, options);
    }
  }
  CordReader reader(&src);
  ReaderInputStream input_stream(&reader);
  bool parse_ok;
  if (!options.merge() &&
      options.recursion_limit() ==
          google::protobuf::io::CodedInputStream::GetDefaultRecursionLimit()) {
    parse_ok = dest.ParsePartialFromZeroCopyStream(&input_stream);
  } else {
    if (!options.merge()) dest.Clear();
    google::protobuf::io::CodedInputStream coded_stream(&input_stream);
    coded_stream.SetRecursionLimit(options.recursion_limit());
    parse_ok = dest.MergePartialFromCodedStream(&coded_stream) &&
               coded_stream.ConsumedEntireMessage();
  }
  RIEGELI_ASSERT_OK(reader) << "CordReader has no reason to fail";
  if (ABSL_PREDICT_FALSE(!parse_ok)) return ParseError(dest);
  return CheckInitialized(dest, options);
}

absl::Status ParseMessage(CordIteratorSpan src,
                          google::protobuf::MessageLite& dest,
                          ParseMessageOptions options) {
  if (!options.merge() &&
      options.recursion_limit() ==
          google::protobuf::io::CodedInputStream::GetDefaultRecursionLimit() &&
      src.length() <= kMaxBytesToCopy) {
    if (const std::optional<absl::string_view> flat = src.TryFlat();
        flat != std::nullopt) {
      absl::Cord::Advance(&src.iterator(), flat->size());
      // The data are flat. `ParsePartialFromArray()` is faster than
      // `ParsePartialFromZeroCopyStream()`.
      if (ABSL_PREDICT_FALSE(!dest.ParsePartialFromArray(
              flat->data(), IntCast<int>(flat->size())))) {
        return ParseError(dest);
      }
      return CheckInitialized(dest, options);
    }
  }
  CordReader reader(std::move(src));
  ReaderInputStream input_stream(&reader);
  bool parse_ok;
  if (!options.merge() &&
      options.recursion_limit() ==
          google::protobuf::io::CodedInputStream::GetDefaultRecursionLimit()) {
    parse_ok = dest.ParsePartialFromZeroCopyStream(&input_stream);
  } else {
    if (!options.merge()) dest.Clear();
    google::protobuf::io::CodedInputStream coded_stream(&input_stream);
    coded_stream.SetRecursionLimit(options.recursion_limit());
    parse_ok = dest.MergePartialFromCodedStream(&coded_stream) &&
               coded_stream.ConsumedEntireMessage();
  }
  RIEGELI_ASSERT_OK(reader) << "CordReader has no reason to fail";
  if (ABSL_PREDICT_FALSE(!parse_ok)) return ParseError(dest);
  return CheckInitialized(dest, options);
}

bool ReaderInputStream::Next(const void** data, int* size) {
  RIEGELI_ASSERT_NE(src_, nullptr)
      << "Failed precondition of ReaderInputStream::Next(): "
         "ReaderInputStream not initialized";
  if (ABSL_PREDICT_FALSE(src_->pos() >=
                         Position{std::numeric_limits<int64_t>::max()})) {
    return false;
  }
  const Position max_length =
      Position{std::numeric_limits<int64_t>::max()} - src_->pos();
  if (ABSL_PREDICT_FALSE(!src_->Pull())) return false;
  *data = src_->cursor();
  *size = SaturatingIntCast<int>(UnsignedMin(src_->available(), max_length));
  src_->move_cursor(IntCast<size_t>(*size));
  return true;
}

void ReaderInputStream::BackUp(int length) {
  RIEGELI_ASSERT_NE(src_, nullptr)
      << "Failed precondition of ReaderInputStream::BackUp(): "
         "ReaderInputStream not initialized";
  RIEGELI_ASSERT_GE(length, 0)
      << "Failed precondition of ZeroCopyInputStream::BackUp(): "
         "negative length";
  RIEGELI_ASSERT_LE(IntCast<size_t>(length), src_->start_to_cursor())
      << "Failed precondition of ZeroCopyInputStream::BackUp(): "
         "length larger than the amount of buffered data";
  src_->set_cursor(src_->cursor() - length);
}

bool ReaderInputStream::Skip(int length) {
  RIEGELI_ASSERT_NE(src_, nullptr)
      << "Failed precondition of ReaderInputStream::Skip(): "
         "ReaderInputStream not initialized";
  RIEGELI_ASSERT_GE(length, 0)
      << "Failed precondition of ZeroCopyInputStream::Skip(): negative length";
  const Position max_length =
      SaturatingSub(Position{std::numeric_limits<int64_t>::max()}, src_->pos());
  const size_t length_to_skip =
      UnsignedMin(IntCast<size_t>(length), max_length);
  return src_->Skip(length_to_skip) &&
         length_to_skip == IntCast<size_t>(length);
}

int64_t ReaderInputStream::ByteCount() const {
  RIEGELI_ASSERT_NE(src_, nullptr)
      << "Failed precondition of ReaderInputStream::ByteCount(): "
         "ReaderInputStream not initialized";
  return SaturatingIntCast<int64_t>(src_->pos());
}

bool ReaderInputStream::ReadCord(absl::Cord* cord, int length) {
  RIEGELI_ASSERT_NE(src_, nullptr)
      << "Failed precondition of ReaderInputStream::ReadCord(): "
         "ReaderInputStream not initialized";
  RIEGELI_ASSERT_GE(length, 0)
      << "Failed precondition of ZeroCopyInputStream::ReadCord(): "
         "negative length";
  const Position max_length =
      SaturatingSub(Position{std::numeric_limits<int64_t>::max()}, src_->pos());
  const size_t length_to_read =
      UnsignedMin(IntCast<size_t>(length), max_length);
  return src_->ReadAndAppend(length_to_read, *cord) &&
         length_to_read == IntCast<size_t>(length);
}

}  // namespace riegeli


================================================
FILE: riegeli/messages/parse_message.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_MESSAGES_PARSE_MESSAGE_H_
#define RIEGELI_MESSAGES_PARSE_MESSAGE_H_

#include <stddef.h>
#include <stdint.h>

#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "google/protobuf/io/coded_stream.h"
#include "google/protobuf/io/zero_copy_stream.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/cord_iterator_span.h"
#include "riegeli/base/dependency.h"
#include "riegeli/bytes/limiting_reader.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

class ParseMessageOptions {
 public:
  ParseMessageOptions() noexcept {}

  // If `false`, replaces existing contents of the destination, clearing it
  // first.
  //
  // If `true`, merges to existing contents of the destination.
  //
  // Default: `false`.
  ParseMessageOptions& set_merge(bool merge) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
    merge_ = merge;
    return *this;
  }
  ParseMessageOptions&& set_merge(bool merge) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_merge(merge));
  }
  bool merge() const { return merge_; }

  // If `false`, missing required fields cause a failure.
  //
  // If `true`, missing required fields result in a partial parsed message,
  // not having these fields.
  //
  // Default: `false`.
  ParseMessageOptions& set_partial(bool partial) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    partial_ = partial;
    return *this;
  }
  ParseMessageOptions&& set_partial(bool partial) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_partial(partial));
  }
  bool partial() const { return partial_; }

  // Maximum depth of submessages allowed.
  //
  // `recursion_limit` must be non-negative.
  // Default:
  // `google::protobuf::io::CodedInputStream::GetDefaultRecursionLimit()`
  // (usually 100).
  ParseMessageOptions& set_recursion_limit(int recursion_limit) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT_GE(recursion_limit, 0)
        << "Failed precondition of ParseMessageOptions::set_recursion_limit(): "
           "recursion limit out of range";
    recursion_limit_ = recursion_limit;
    return *this;
  }
  ParseMessageOptions&& set_recursion_limit(int recursion_limit) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_recursion_limit(recursion_limit));
  }
  int recursion_limit() const { return recursion_limit_; }

 private:
  bool merge_ = false;
  bool partial_ = false;
  int recursion_limit_ =
      google::protobuf::io::CodedInputStream::GetDefaultRecursionLimit();
};

// Reads a message in binary format from the given `Reader`. If successful, the
// entire input will be consumed.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the `Reader`. `Src` must support
// `DependencyRef<Reader*, Src>`, e.g. `Reader&` (not owned),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `AnyRef<Reader*>` (maybe owned).
//
// Returns status:
//  * `status.ok()`  - success (`dest` is filled)
//  * `!status.ok()` - failure (`dest` is unspecified)
template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int> = 0>
absl::Status ParseMessage(Src&& src, google::protobuf::MessageLite& dest,
                          ParseMessageOptions options = ParseMessageOptions());

// Optimized implementation for `ReaderSpan`.
template <typename ReaderType>
absl::Status ParseMessage(ReaderSpan<ReaderType> src,
                          google::protobuf::MessageLite& dest,
                          ParseMessageOptions options = ParseMessageOptions());

// Reads a message length as varint32 (at most 2G), then a message in binary
// format with that length from the given `Reader`.
//
// Returns status:
//  * `status.ok()`  - success (`dest` is filled)
//  * `!status.ok()` - failure (`dest` is unspecified)
absl::Status ParseLengthPrefixedMessage(
    Reader& src, google::protobuf::MessageLite& dest,
    ParseMessageOptions options = ParseMessageOptions());
absl::Status ParseLengthPrefixedMessage(
    LimitingReaderBase& src, google::protobuf::MessageLite& dest,
    ParseMessageOptions options = ParseMessageOptions());

// Reads a message in binary format from `src`.
//
// Returns status:
//  * `status.ok()`  - success (`dest` is filled)
//  * `!status.ok()` - failure (`dest` is unspecified)
absl::Status ParseMessage(BytesRef src, google::protobuf::MessageLite& dest,
                          ParseMessageOptions options = ParseMessageOptions());
absl::Status ParseMessage(const Chain& src, google::protobuf::MessageLite& dest,
                          ParseMessageOptions options = ParseMessageOptions());
absl::Status ParseMessage(const absl::Cord& src,
                          google::protobuf::MessageLite& dest,
                          ParseMessageOptions options = ParseMessageOptions());
absl::Status ParseMessage(CordIteratorSpan src,
                          google::protobuf::MessageLite& dest,
                          ParseMessageOptions options = ParseMessageOptions());

// Adapts a `Reader` to a `google::protobuf::io::ZeroCopyInputStream`.
class ReaderInputStream : public google::protobuf::io::ZeroCopyInputStream {
 public:
  // Creates a dummy `ReaderInputStream`. Use `Reset(src)` to initialize it to
  // usable state.
  ReaderInputStream() = default;

  // Will read from `*src`.
  explicit ReaderInputStream(Reader* src ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : src_(RIEGELI_EVAL_ASSERT_NOTNULL(src)) {}

  ABSL_ATTRIBUTE_REINITIALIZES void Reset() { src_ = nullptr; }
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Reader* src) {
    src_ = RIEGELI_EVAL_ASSERT_NOTNULL(src);
  }

  bool Next(const void** data, int* size) override;
  void BackUp(int length) override;
  bool Skip(int length) override;
  int64_t ByteCount() const override;
  bool ReadCord(absl::Cord* cord, int length) override;

 private:
  Reader* src_ = nullptr;
};

// Implementation details follow.

namespace parse_message_internal {

absl::Status ParseMessageImpl(Reader& src, google::protobuf::MessageLite& dest,
                              ParseMessageOptions options);

absl::Status ParseMessageImpl(ReaderSpan<Reader> src,
                              google::protobuf::MessageLite& dest,
                              ParseMessageOptions options);

absl::Status ParseMessageImpl(ReaderSpan<> src,
                              google::protobuf::MessageLite& dest,
                              ParseMessageOptions options);

}  // namespace parse_message_internal

template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
inline absl::Status ParseMessage(Src&& src, google::protobuf::MessageLite& dest,
                                 ParseMessageOptions options) {
  DependencyRef<Reader*, Src> src_dep(std::forward<Src>(src));
  if (src_dep.IsOwning()) src_dep->SetReadAllHint(true);
  absl::Status status =
      parse_message_internal::ParseMessageImpl(*src_dep, dest, options);
  if (src_dep.IsOwning()) {
    if (ABSL_PREDICT_TRUE(status.ok())) src_dep->VerifyEnd();
    if (ABSL_PREDICT_FALSE(!src_dep->Close())) status.Update(src_dep->status());
  }
  return status;
}

template <typename ReaderType>
absl::Status ParseMessage(ReaderSpan<ReaderType> src,
                          google::protobuf::MessageLite& dest,
                          ParseMessageOptions options) {
  return parse_message_internal::ParseMessageImpl(std::move(src), dest,
                                                  options);
}

}  // namespace riegeli

#endif  // RIEGELI_MESSAGES_PARSE_MESSAGE_H_


================================================
FILE: riegeli/messages/serialize_message.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/messages/serialize_message.h"

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <optional>
#include <string>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/cord_buffer.h"
#include "absl/strings/str_cat.h"
#include "absl/types/span.h"
#include "google/protobuf/io/coded_stream.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/compact_string.h"
#include "riegeli/base/string_utils.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/array_writer.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/chain_writer.h"
#include "riegeli/bytes/cord_writer.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/varint/varint_writing.h"

namespace riegeli {

namespace {

ABSL_ATTRIBUTE_COLD inline absl::Status FailSizeOverflow(
    const google::protobuf::MessageLite& src, size_t size) {
  return absl::ResourceExhaustedError(
      absl::StrCat("Failed to serialize message of type ", src.GetTypeName(),
                   " because its size must be smaller than 2GiB: ", size));
}

ABSL_ATTRIBUTE_COLD inline absl::Status FailSizeOverflow(
    const google::protobuf::MessageLite& src, Writer& dest, size_t size) {
  return dest.AnnotateStatus(FailSizeOverflow(src, size));
}

ABSL_ATTRIBUTE_COLD inline absl::Status FailSizeOverflow(
    const google::protobuf::MessageLite& src, BackwardWriter& dest,
    size_t size) {
  return dest.AnnotateStatus(FailSizeOverflow(src, size));
}

inline absl::Status SerializeMessageUsingStream(
    const google::protobuf::MessageLite& src, Writer& dest,
    std::optional<bool> deterministic, size_t size) {
  WriterOutputStream output_stream(&dest);
  google::protobuf::io::CodedOutputStream coded_stream(&output_stream);
  if (deterministic != std::nullopt) {
    coded_stream.SetSerializationDeterministic(*deterministic);
  }
  src.SerializeWithCachedSizes(&coded_stream);
  RIEGELI_ASSERT_EQ(src.ByteSizeLong(), size)
      << src.GetTypeName() << " was modified concurrently during serialization";
  // Flush `coded_stream` before checking `dest.ok()`.
  coded_stream.Trim();
  if (ABSL_PREDICT_FALSE(!dest.ok())) return dest.status();
  RIEGELI_ASSERT(!coded_stream.HadError())
      << "Failed to serialize message of type " << src.GetTypeName()
      << ": SerializeWithCachedSizes() failed for an unknown reason";
  RIEGELI_ASSERT_EQ(IntCast<size_t>(coded_stream.ByteCount()), size)
      << "Byte size calculation and serialization were inconsistent. "
         "This may indicate a bug in protocol buffers "
         "or it may be caused by concurrent modification of "
      << src.GetTypeName();
  return absl::OkStatus();
}

inline absl::Status SerializeMessageHavingSize(
    const google::protobuf::MessageLite& src, Writer& dest,
    std::optional<bool> deterministic, size_t size) {
  if (size <= kMaxBytesToCopy && deterministic == std::nullopt) {
    // The data are small, so making a flat output is harmless.
    // `SerializeWithCachedSizesToArray()` is faster than
    // `SerializeWithCachedSizes()`.
    //
    // Avoid `nullptr` violation in `SerializeWithCachedSizesToArray()`.
    if (size == 0) return absl::OkStatus();
    if (ABSL_PREDICT_FALSE(!dest.Push(size))) return dest.status();
    char* const cursor =
        reinterpret_cast<char*>(src.SerializeWithCachedSizesToArray(
            reinterpret_cast<uint8_t*>(dest.cursor())));
    RIEGELI_ASSERT_EQ(src.ByteSizeLong(), size)
        << src.GetTypeName()
        << " was modified concurrently during serialization";
    RIEGELI_ASSERT_EQ(PtrDistance(dest.cursor(), cursor), size)
        << "Byte size calculation and serialization were inconsistent. "
           "This may indicate a bug in protocol buffers "
           "or it may be caused by concurrent modification of "
        << src.GetTypeName();
    dest.set_cursor(cursor);
    return absl::OkStatus();
  }
  return SerializeMessageUsingStream(src, dest, deterministic, size);
}

inline absl::Status SerializeMessageHavingSize(
    const google::protobuf::MessageLite& src, BackwardWriter& dest,
    std::optional<bool> deterministic, size_t size) {
  if (size <= kMaxBytesToCopy && deterministic == std::nullopt) {
    // The data are small, so making a flat output is harmless.
    // `SerializeWithCachedSizesToArray()` is faster than
    // `SerializeWithCachedSizes()`.
    //
    // Avoid `nullptr` violation in `SerializeWithCachedSizesToArray()`.
    if (size == 0) return absl::OkStatus();
    if (ABSL_PREDICT_FALSE(!dest.Push(size))) return dest.status();
    dest.move_cursor(size);
    char* const cursor =
        reinterpret_cast<char*>(src.SerializeWithCachedSizesToArray(
            reinterpret_cast<uint8_t*>(dest.cursor())));
    RIEGELI_ASSERT_EQ(src.ByteSizeLong(), size)
        << src.GetTypeName()
        << " was modified concurrently during serialization";
    RIEGELI_ASSERT_EQ(PtrDistance(dest.cursor(), cursor), size)
        << "Byte size calculation and serialization were inconsistent. "
           "This may indicate a bug in protocol buffers "
           "or it may be caused by concurrent modification of "
        << src.GetTypeName();
    return absl::OkStatus();
  }
  riegeli::CordWriter cord_writer;
  if (absl::Status status =
          SerializeMessageUsingStream(src, cord_writer, deterministic, size);
      ABSL_PREDICT_FALSE(!status.ok())) {
    return status;
  }
  if (ABSL_PREDICT_FALSE(!cord_writer.Close())) return cord_writer.status();
  if (ABSL_PREDICT_FALSE(!dest.Write(std::move(cord_writer.dest())))) {
    return dest.status();
  }
  return absl::OkStatus();
}

}  // namespace

namespace serialize_message_internal {

absl::Status SerializeMessageImpl(const google::protobuf::MessageLite& src,
                                  Writer& dest, SerializeMessageOptions options,
                                  bool set_write_hint) {
  RIEGELI_ASSERT(options.partial() || src.IsInitialized())
      << "Failed to serialize message of type " << src.GetTypeName()
      << " because it is missing required fields: "
      << src.InitializationErrorString();
  const size_t size = options.GetByteSize(src);
  if (ABSL_PREDICT_FALSE(size >
                         uint32_t{std::numeric_limits<int32_t>::max()})) {
    return FailSizeOverflow(src, dest, size);
  }
  if (set_write_hint) dest.SetWriteSizeHint(size);
  return SerializeMessageHavingSize(src, dest, options.deterministic(), size);
}

absl::Status SerializeMessageImpl(const google::protobuf::MessageLite& src,
                                  BackwardWriter& dest,
                                  SerializeMessageOptions options,
                                  bool set_write_hint) {
  RIEGELI_ASSERT(options.partial() || src.IsInitialized())
      << "Failed to serialize message of type " << src.GetTypeName()
      << " because it is missing required fields: "
      << src.InitializationErrorString();
  const size_t size = options.GetByteSize(src);
  if (ABSL_PREDICT_FALSE(size >
                         uint32_t{std::numeric_limits<int32_t>::max()})) {
    return FailSizeOverflow(src, dest, size);
  }
  if (set_write_hint) dest.SetWriteSizeHint(size);
  return SerializeMessageHavingSize(src, dest, options.deterministic(), size);
}

}  // namespace serialize_message_internal

absl::Status SerializeLengthPrefixedMessage(
    const google::protobuf::MessageLite& src, Writer& dest,
    SerializeMessageOptions options) {
  RIEGELI_ASSERT(options.partial() || src.IsInitialized())
      << "Failed to serialize message of type " << src.GetTypeName()
      << " because it is missing required fields: "
      << src.InitializationErrorString();
  const size_t size = options.GetByteSize(src);
  if (ABSL_PREDICT_FALSE(size >
                         uint32_t{std::numeric_limits<int32_t>::max()})) {
    return FailSizeOverflow(src, dest, size);
  }
  if (ABSL_PREDICT_FALSE(!WriteVarint32(IntCast<uint32_t>(size), dest))) {
    return dest.status();
  }
  return SerializeMessageHavingSize(src, dest, options.deterministic(), size);
}

absl::Status SerializeMessage(const google::protobuf::MessageLite& src,
                              std::string& dest,
                              SerializeMessageOptions options) {
  RIEGELI_ASSERT(options.partial() || src.IsInitialized())
      << "Failed to serialize message of type " << src.GetTypeName()
      << " because it is missing required fields: "
      << src.InitializationErrorString();
  const size_t size = options.GetByteSize(src);
  if (ABSL_PREDICT_FALSE(size >
                         uint32_t{std::numeric_limits<int32_t>::max()})) {
    return FailSizeOverflow(src, size);
  }
  dest.clear();
  // Avoid `nullptr` violation in `SerializeWithCachedSizesToArray()`.
  if (size == 0) return absl::OkStatus();
  absl::Status status;
  riegeli::StringResizeAndOverwriteAmortized(
      dest, size, [&](char* data, size_t size) {
        if (options.deterministic() == std::nullopt) {
          // Creating a string, which is necessarily flat.
          // `SerializeWithCachedSizesToArray()` is faster than
          // `SerializeWithCachedSizes()`.
          char* const cursor =
              reinterpret_cast<char*>(src.SerializeWithCachedSizesToArray(
                  reinterpret_cast<uint8_t*>(data)));
          RIEGELI_ASSERT_EQ(src.ByteSizeLong(), size)
              << src.GetTypeName()
              << " was modified concurrently during serialization";
          RIEGELI_ASSERT_EQ(PtrDistance(data, cursor), size)
              << "Byte size calculation and serialization were inconsistent. "
                 "This may indicate a bug in protocol buffers "
                 "or it may be caused by concurrent modification of "
              << src.GetTypeName();
          return size;
        }
        riegeli::ArrayWriter writer(data, size);
        status = SerializeMessageUsingStream(src, writer,
                                             options.deterministic(), size);
        RIEGELI_EVAL_ASSERT(writer.Close())
            << "ArrayWriter has no reason to fail "
               "if the size does not overflow: "
            << writer.status();
        return writer.written().size();
      });
  return status;
}

absl::Status SerializeMessage(const google::protobuf::MessageLite& src,
                              CompactString& dest,
                              SerializeMessageOptions options) {
  RIEGELI_ASSERT(options.partial() || src.IsInitialized())
      << "Failed to serialize message of type " << src.GetTypeName()
      << " because it is missing required fields: "
      << src.InitializationErrorString();
  const size_t size = options.GetByteSize(src);
  if (ABSL_PREDICT_FALSE(size >
                         uint32_t{std::numeric_limits<int32_t>::max()})) {
    return FailSizeOverflow(src, size);
  }
  char* const data = dest.resize(size, 0);
  RIEGELI_ASSERT(data != nullptr) << "CompactString data are never nullptr";
  if (options.deterministic() == std::nullopt) {
    // Creating a string, which is necessarily flat.
    // `SerializeWithCachedSizesToArray()` is faster than
    // `SerializeWithCachedSizes()`.
    char* const cursor = reinterpret_cast<char*>(
        src.SerializeWithCachedSizesToArray(reinterpret_cast<uint8_t*>(data)));
    RIEGELI_ASSERT_EQ(src.ByteSizeLong(), size)
        << src.GetTypeName()
        << " was modified concurrently during serialization";
    RIEGELI_ASSERT_EQ(PtrDistance(data, cursor), size)
        << "Byte size calculation and serialization were inconsistent. "
           "This may indicate a bug in protocol buffers "
           "or it may be caused by concurrent modification of "
        << src.GetTypeName();
    return absl::OkStatus();
  }
  riegeli::ArrayWriter writer(data, size);
  const absl::Status status =
      SerializeMessageUsingStream(src, writer, options.deterministic(), size);
  RIEGELI_EVAL_ASSERT(writer.Close()) << "ArrayWriter has no reason to fail "
                                         "if the size does not overflow: "
                                      << writer.status();
  return status;
}

absl::Status SerializeMessage(const google::protobuf::MessageLite& src,
                              Chain& dest, SerializeMessageOptions options) {
  RIEGELI_ASSERT(options.partial() || src.IsInitialized())
      << "Failed to serialize message of type " << src.GetTypeName()
      << " because it is missing required fields: "
      << src.InitializationErrorString();
  const size_t size = options.GetByteSize(src);
  if (ABSL_PREDICT_FALSE(size >
                         uint32_t{std::numeric_limits<int32_t>::max()})) {
    return FailSizeOverflow(src, size);
  }
  if (size <= kMaxBytesToCopy && options.deterministic() == std::nullopt) {
    // The data are small, so making a flat output is harmless.
    // `SerializeWithCachedSizesToArray()` is faster than
    // `SerializeWithCachedSizes()`.
    dest.Clear();
    // Avoid `nullptr` violation in `SerializeWithCachedSizesToArray()`.
    if (size == 0) return absl::OkStatus();
    const absl::Span<char> buffer =
        dest.AppendFixedBuffer(size, Chain::Options().set_size_hint(size));
    char* const cursor =
        reinterpret_cast<char*>(src.SerializeWithCachedSizesToArray(
            reinterpret_cast<uint8_t*>(buffer.data())));
    RIEGELI_ASSERT_EQ(src.ByteSizeLong(), size)
        << src.GetTypeName()
        << " was modified concurrently during serialization";
    RIEGELI_ASSERT_EQ(PtrDistance(buffer.data(), cursor), size)
        << "Byte size calculation and serialization were inconsistent. "
           "This may indicate a bug in protocol buffers "
           "or it may be caused by concurrent modification of "
        << src.GetTypeName();
    return absl::OkStatus();
  }
  riegeli::ChainWriter writer(&dest);
  writer.SetWriteSizeHint(size);
  const absl::Status status =
      SerializeMessageUsingStream(src, writer, options.deterministic(), size);
  RIEGELI_EVAL_ASSERT(writer.Close())
      << "ChainWriter has no reason to fail: " << writer.status();
  return status;
}

absl::Status SerializeMessage(const google::protobuf::MessageLite& src,
                              absl::Cord& dest,
                              SerializeMessageOptions options) {
  RIEGELI_ASSERT(options.partial() || src.IsInitialized())
      << "Failed to serialize message of type " << src.GetTypeName()
      << " because it is missing required fields: "
      << src.InitializationErrorString();
  const size_t size = options.GetByteSize(src);
  if (ABSL_PREDICT_FALSE(size >
                         uint32_t{std::numeric_limits<int32_t>::max()})) {
    return FailSizeOverflow(src, size);
  }
  if (size <= kMaxBytesToCopy && options.deterministic() == std::nullopt) {
    // The data are small, so making a flat output is harmless.
    // `SerializeWithCachedSizesToArray()` is faster than
    // `SerializeWithCachedSizes()`.
    absl::CordBuffer buffer = dest.GetAppendBuffer(0, 0);
    dest.Clear();
    // Avoid `nullptr` violation in `SerializeWithCachedSizesToArray()`.
    if (size == 0) return absl::OkStatus();
    if (buffer.capacity() < size) {
      static_assert(kMaxBytesToCopy <= absl::CordBuffer::kDefaultLimit,
                    "Guarantees that buffer.capacity() will be at least size");
      buffer = absl::CordBuffer::CreateWithDefaultLimit(size);
    }
    buffer.SetLength(size);
    char* const cursor =
        reinterpret_cast<char*>(src.SerializeWithCachedSizesToArray(
            reinterpret_cast<uint8_t*>(buffer.data())));
    RIEGELI_ASSERT_EQ(src.ByteSizeLong(), size)
        << src.GetTypeName()
        << " was modified concurrently during serialization";
    RIEGELI_ASSERT_EQ(PtrDistance(buffer.data(), cursor), size)
        << "Byte size calculation and serialization were inconsistent. "
           "This may indicate a bug in protocol buffers "
           "or it may be caused by concurrent modification of "
        << src.GetTypeName();
    dest.Append(std::move(buffer));
    return absl::OkStatus();
  }
  riegeli::CordWriter writer(&dest);
  writer.SetWriteSizeHint(size);
  const absl::Status status =
      SerializeMessageUsingStream(src, writer, options.deterministic(), size);
  RIEGELI_EVAL_ASSERT(writer.Close())
      << "CordWriter has no reason to fail: " << writer.status();
  return status;
}

bool WriterOutputStream::Next(void** data, int* size) {
  RIEGELI_ASSERT_NE(dest_, nullptr)
      << "Failed precondition of WriterOutputStream::Next(): "
         "WriterOutputStream not initialized";
  if (ABSL_PREDICT_FALSE(dest_->pos() >=
                         Position{std::numeric_limits<int64_t>::max()})) {
    return false;
  }
  const Position max_length =
      Position{std::numeric_limits<int64_t>::max()} - dest_->pos();
  if (ABSL_PREDICT_FALSE(!dest_->Push())) return false;
  *data = dest_->cursor();
  *size = SaturatingIntCast<int>(UnsignedMin(dest_->available(), max_length));
  dest_->move_cursor(IntCast<size_t>(*size));
  return true;
}

void WriterOutputStream::BackUp(int length) {
  RIEGELI_ASSERT_NE(dest_, nullptr)
      << "Failed precondition of WriterOutputStream::BackUp(): "
         "WriterOutputStream not initialized";
  RIEGELI_ASSERT_GE(length, 0)
      << "Failed precondition of ZeroCopyOutputStream::BackUp(): "
         "negative length";
  RIEGELI_ASSERT_LE(IntCast<size_t>(length), dest_->start_to_cursor())
      << "Failed precondition of ZeroCopyOutputStream::BackUp(): "
         "length larger than the amount of buffered data";
  dest_->set_cursor(dest_->cursor() - length);
}

int64_t WriterOutputStream::ByteCount() const {
  RIEGELI_ASSERT_NE(dest_, nullptr)
      << "Failed precondition of WriterOutputStream::ByteCount(): "
         "WriterOutputStream not initialized";
  return SaturatingIntCast<int64_t>(dest_->pos());
}

bool WriterOutputStream::WriteCord(const absl::Cord& src) {
  RIEGELI_ASSERT_NE(dest_, nullptr)
      << "Failed precondition of WriterOutputStream::WriteCord(): "
         "WriterOutputStream not initialized";
  if (ABSL_PREDICT_FALSE(src.size() >
                         Position{std::numeric_limits<int64_t>::max()} -
                             dest_->pos())) {
    return false;
  }
  return dest_->Write(src);
}

}  // namespace riegeli


================================================
FILE: riegeli/messages/serialize_message.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_MESSAGES_SERIALIZE_MESSAGE_H_
#define RIEGELI_MESSAGES_SERIALIZE_MESSAGE_H_

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <optional>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "google/protobuf/io/coded_stream.h"
#include "google/protobuf/io/zero_copy_stream.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/compact_string.h"
#include "riegeli/base/dependency.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

class SerializeMessageOptions {
 public:
  SerializeMessageOptions() noexcept {}

  // If `false`, all required fields must be set. This is verified in debug
  // mode.
  //
  // If `true`, missing required fields result in a partial serialized message,
  // not having these fields.
  //
  // Default: `false`.
  SerializeMessageOptions& set_partial(bool partial) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    partial_ = partial;
    return *this;
  }
  SerializeMessageOptions&& set_partial(bool partial) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_partial(partial));
  }
  bool partial() const { return partial_; }

  // If `false`, a deterministic result is not guaranteed but serialization can
  // be faster.
  //
  // If `true`, a deterministic result is guaranteed (as long as the schema
  // does not change in inappropriate ways and there are no unknown fields)
  // but serialization can be slower.
  //
  // `std::nullopt` is equivalent to
  // `google::protobuf::io::CodedOutputStream::IsDefaultSerializationDeterministic()`.
  //
  // This matches
  // `google::protobuf::io::CodedOutputStream::SetSerializationDeterministic()`.
  //
  // Default: `std::nullopt`.
  SerializeMessageOptions& set_deterministic(
      std::optional<bool> deterministic) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    deterministic_ = deterministic;
    return *this;
  }
  SerializeMessageOptions&& set_deterministic(
      std::optional<bool> deterministic) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_deterministic(deterministic));
  }
  std::optional<bool> deterministic() const { return deterministic_; }

  // If `true`, promises that `ByteSizeLong()` has been called on the message
  // being serialized after its last modification, and that its result does not
  // exceed `std::numeric_limits<int32_t>::max()`.
  //
  // This makes serialization faster by allowing to use `GetCachedSize()`
  // instead of `ByteSizeLong()`.
  //
  // Default: `false`.
  SerializeMessageOptions& set_has_cached_size(bool has_cached_size) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    has_cached_size_ = has_cached_size;
    return *this;
  }
  SerializeMessageOptions&& set_has_cached_size(bool has_cached_size) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_has_cached_size(has_cached_size));
  }
  bool has_cached_size() const { return has_cached_size_; }

  // Returns `message.ByteSizeLong()` faster.
  //
  // Requires that these `SerializeMessageOptions` will be used only for
  // serializing this `message`, which will not be modified in the meantime.
  //
  // This consults and updates `has_cached_size()` in these
  // `SerializeMessageOptions`.
  size_t GetByteSize(const google::protobuf::MessageLite& message);

 private:
  bool partial_ = false;
  std::optional<bool> deterministic_;
  bool has_cached_size_ = false;
};

// Writes the message in binary format to the given `Writer`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the `Writer`. `Dest` must support
// `DependencyRef<Writer*, Dest>`, e.g. `Writer&` (not owned),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `AnyRef<Writer*>` (maybe owned).
//
// Returns status:
//  * `status.ok()`  - success (`dest` is written to)
//  * `!status.ok()` - failure (`dest` is unspecified)
template <typename Dest,
          std::enable_if_t<TargetRefSupportsDependency<Writer*, Dest>::value,
                           int> = 0>
absl::Status SerializeMessage(
    const google::protobuf::MessageLite& src, Dest&& dest,
    SerializeMessageOptions options = SerializeMessageOptions());

// Writes the message in binary format to the given `BackwardWriter`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the `BackwardWriter`. `Dest` must support
// `DependencyRef<BackwardWriter*, Dest>`, e.g. `BackwardWriter&` (not owned),
// `ChainBackwardWriter<>` (owned), `std::unique_ptr<BackwardWriter>` (owned),
// `AnyRef<BackwardWriter*>` (maybe owned).
//
// Returns status:
//  * `status.ok()`  - success (`dest` is written to)
//  * `!status.ok()` - failure (`dest` is unspecified)
template <
    typename Dest,
    std::enable_if_t<TargetRefSupportsDependency<BackwardWriter*, Dest>::value,
                     int> = 0>
absl::Status SerializeMessage(
    const google::protobuf::MessageLite& src, Dest&& dest,
    SerializeMessageOptions options = SerializeMessageOptions());

// Writes the message length as varint32, then the message in binary format to
// the given `Writer`.
//
// Returns status:
//  * `status.ok()`  - success (`dest` is written to)
//  * `!status.ok()` - failure (`dest` is unspecified)
absl::Status SerializeLengthPrefixedMessage(
    const google::protobuf::MessageLite& src, Writer& dest,
    SerializeMessageOptions options = SerializeMessageOptions());

// Writes the message in binary format to `dest`, clearing any existing data in
// `dest`.
//
// Returns status:
//  * `status.ok()`  - success (`dest` is filled)
//  * `!status.ok()` - failure (`dest` is unspecified)
absl::Status SerializeMessage(
    const google::protobuf::MessageLite& src, std::string& dest,
    SerializeMessageOptions options = SerializeMessageOptions());
absl::Status SerializeMessage(
    const google::protobuf::MessageLite& src, CompactString& dest,
    SerializeMessageOptions options = SerializeMessageOptions());
absl::Status SerializeMessage(
    const google::protobuf::MessageLite& src, Chain& dest,
    SerializeMessageOptions options = SerializeMessageOptions());
absl::Status SerializeMessage(
    const google::protobuf::MessageLite& src, absl::Cord& dest,
    SerializeMessageOptions options = SerializeMessageOptions());

// Adapts a `Writer` to a `google::protobuf::io::ZeroCopyOutputStream`.
class WriterOutputStream : public google::protobuf::io::ZeroCopyOutputStream {
 public:
  // Creates a dummy `WriterOutputStream`. Use `Reset(dest)` to initialize it to
  // usable state.
  WriterOutputStream() = default;

  // Will write to `*dest`.
  explicit WriterOutputStream(Writer* dest ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : dest_(RIEGELI_EVAL_ASSERT_NOTNULL(dest)) {}

  ABSL_ATTRIBUTE_REINITIALIZES void Reset() { dest_ = nullptr; }
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Writer* dest) {
    dest_ = RIEGELI_EVAL_ASSERT_NOTNULL(dest);
  }

  bool Next(void** data, int* size) override;
  void BackUp(int length) override;
  int64_t ByteCount() const override;
  bool WriteCord(const absl::Cord& src) override;

 private:
  Writer* dest_ = nullptr;
};

// Implementation details follow.

inline size_t SerializeMessageOptions::GetByteSize(
    const google::protobuf::MessageLite& message) {
  if (has_cached_size()) return IntCast<size_t>(message.GetCachedSize());
  const size_t size = message.ByteSizeLong();
  if (ABSL_PREDICT_TRUE(size <=
                        uint32_t{std::numeric_limits<int32_t>::max()})) {
    set_has_cached_size(true);
  }
  return size;
}

namespace serialize_message_internal {

absl::Status SerializeMessageImpl(const google::protobuf::MessageLite& src,
                                  Writer& dest, SerializeMessageOptions options,
                                  bool set_write_hint);

absl::Status SerializeMessageImpl(const google::protobuf::MessageLite& src,
                                  BackwardWriter& dest,
                                  SerializeMessageOptions options,
                                  bool set_write_hint);

}  // namespace serialize_message_internal

template <
    typename Dest,
    std::enable_if_t<TargetRefSupportsDependency<Writer*, Dest>::value, int>>
inline absl::Status SerializeMessage(const google::protobuf::MessageLite& src,
                                     Dest&& dest,
                                     SerializeMessageOptions options) {
  DependencyRef<Writer*, Dest> dest_dep(std::forward<Dest>(dest));
  absl::Status status = serialize_message_internal::SerializeMessageImpl(
      src, *dest_dep, options, dest_dep.IsOwning());
  if (dest_dep.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_dep->Close())) {
      status.Update(dest_dep->status());
    }
  }
  return status;
}

template <typename Dest,
          std::enable_if_t<
              TargetRefSupportsDependency<BackwardWriter*, Dest>::value, int>>
inline absl::Status SerializeMessage(const google::protobuf::MessageLite& src,
                                     Dest&& dest,
                                     SerializeMessageOptions options) {
  DependencyRef<BackwardWriter*, Dest> dest_dep(std::forward<Dest>(dest));
  absl::Status status = serialize_message_internal::SerializeMessageImpl(
      src, *dest_dep, options, dest_dep.IsOwning());
  if (dest_dep.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_dep->Close())) {
      status.Update(dest_dep->status());
    }
  }
  return status;
}

}  // namespace riegeli

#endif  // RIEGELI_MESSAGES_SERIALIZE_MESSAGE_H_


================================================
FILE: riegeli/messages/serialized_message_assembler.cc
================================================
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/messages/serialized_message_assembler.h"

#include <stddef.h>
#include <stdint.h>

#include <string>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/container/inlined_vector.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/cord_iterator_span.h"
#include "riegeli/base/reset.h"
#include "riegeli/bytes/limiting_reader.h"
#include "riegeli/messages/field_handler_map.h"
#include "riegeli/messages/serialized_message_reader.h"
#include "riegeli/messages/serialized_message_writer.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

SerializedMessageAssembler::Builder::Builder(Builder&& that) noexcept = default;

SerializedMessageAssembler::Builder&
SerializedMessageAssembler::Builder::operator=(Builder&& that) noexcept =
    default;

SerializedMessageAssembler::Builder::~Builder() = default;

void SerializedMessageAssembler::Builder::Reset() {
  next_parent_for_add_ = {/*raw=*/0};
  next_field_for_remove_ = {/*raw=*/0};
  RIEGELI_ASSERT_EQ(root_builder_.field_for_remove.raw,
                    FieldForRemove::kUnregistered);
  root_builder_.parent_for_add = ParentForAdd();
  root_builder_.children.clear();
}

SerializedMessageAssembler::ParentForAdd
SerializedMessageAssembler::Builder::RegisterParentForAdd(
    ParentPath parent_path) {
  RegisteredFieldBuilder& parent_builder = RegisterParentInternal(parent_path);
  if (parent_builder.parent_for_add.raw == ParentForAdd::kUnregistered) {
    parent_builder.parent_for_add.raw = next_parent_for_add_.raw++;
  }
  return parent_builder.parent_for_add;
}

SerializedMessageAssembler::FieldForRemove
SerializedMessageAssembler::Builder::RegisterFieldForRemove(
    ParentPath parent_path, int field_number) {
  RegisteredFieldBuilder& parent_builder = RegisterParentInternal(parent_path);
  RegisteredFieldBuilder& field_builder =
      parent_builder.children.try_emplace(field_number).first->second;
  if (field_builder.field_for_remove.raw == FieldForRemove::kUnregistered) {
    field_builder.field_for_remove.raw = next_field_for_remove_.raw++;
  }
  return field_builder.field_for_remove;
}

SerializedMessageAssembler::RegisteredFieldBuilder&
SerializedMessageAssembler::Builder::RegisterParentInternal(
    ParentPath parent_path) {
  RegisteredFieldBuilder* parent_builder = &root_builder_;
  for (const int field_number : parent_path) {
    parent_builder =
        &parent_builder->children.try_emplace(field_number).first->second;
  }
  return *parent_builder;
}

SerializedMessageAssembler::Session::Session(Session&& that) noexcept
    : fields_to_add_(std::move(that.fields_to_add_)),
      fields_to_remove_(std::exchange(that.fields_to_remove_, {})) {}

SerializedMessageAssembler::Session&
SerializedMessageAssembler::Session::operator=(Session&& that) noexcept {
  fields_to_add_ = std::move(that.fields_to_add_);
  fields_to_remove_ = std::exchange(that.fields_to_remove_, {});
  return *this;
}

SerializedMessageAssembler::Session::~Session() = default;

void SerializedMessageAssembler::Session::Reset() {
  for (FieldValues& field_values : fields_to_add_) {
    field_values.clear();
  }
  fields_to_remove_.clear();
}
void SerializedMessageAssembler::Session::Reset(
    const SerializedMessageAssembler& assembler) {
  Reset();
  fields_to_add_.resize(assembler.num_fields_for_add());
  fields_to_remove_.resize(assembler.num_fields_for_remove());
}

SerializedMessageAssembler::SerializedMessageAssembler(
    SerializedMessageAssembler&& that) noexcept = default;

SerializedMessageAssembler& SerializedMessageAssembler::operator=(
    SerializedMessageAssembler&& that) noexcept = default;

SerializedMessageAssembler::~SerializedMessageAssembler() = default;

SerializedMessageAssembler::SerializedMessageAssembler(Builder&& builder)
    : num_fields_for_add_(builder.next_parent_for_add_),
      num_fields_for_remove_(builder.next_field_for_remove_),
      root_(Build(
          /*field_number=*/0, std::move(builder.root_builder_))) {}

void SerializedMessageAssembler::Reset() {
  num_fields_for_add_ = {/*raw=*/0};
  num_fields_for_remove_ = {/*raw=*/0};
  RIEGELI_ASSERT_EQ(root_.field_number, 0);
  root_.parent_for_add = ParentForAdd();
  root_.submessages.clear();
  root_.handlers.Reset();
}

void SerializedMessageAssembler::Reset(Builder&& builder) {
  num_fields_for_add_ = builder.next_parent_for_add_;
  num_fields_for_remove_ = builder.next_field_for_remove_;
  RIEGELI_ASSERT_EQ(root_.field_number, 0);
  root_.submessages.clear();
  Build(
      /*field_number=*/0, std::move(builder.root_builder_), root_);
}

struct SerializedMessageAssembler::LeafHandler {
  static constexpr int kFieldNumber = kUnboundFieldNumber;

  absl::Status HandleVarint(
      uint64_t repr,
      ABSL_ATTRIBUTE_UNUSED absl::Span<FieldValues> fields_to_add,
      absl::Span<const bool> fields_to_remove,
      ABSL_ATTRIBUTE_UNUSED absl::Span<bool> submessages_rewritten,
      SerializedMessageWriter& message_writer) const {
    if (field_for_remove.raw < fields_to_remove.size() &&
        fields_to_remove[field_for_remove.raw]) {
      return absl::OkStatus();
    }
    return message_writer.WriteUInt64(field_number, repr);
  }

  absl::Status HandleFixed32(
      uint32_t repr,
      ABSL_ATTRIBUTE_UNUSED absl::Span<FieldValues> fields_to_add,
      absl::Span<const bool> fields_to_remove,
      ABSL_ATTRIBUTE_UNUSED absl::Span<bool> submessages_rewritten,
      SerializedMessageWriter& message_writer) const {
    if (field_for_remove.raw < fields_to_remove.size() &&
        fields_to_remove[field_for_remove.raw]) {
      return absl::OkStatus();
    }
    return message_writer.WriteFixed32(field_number, repr);
  }

  absl::Status HandleFixed64(
      uint64_t repr,
      ABSL_ATTRIBUTE_UNUSED absl::Span<FieldValues> fields_to_add,
      absl::Span<const bool> fields_to_remove,
      ABSL_ATTRIBUTE_UNUSED absl::Span<bool> submessages_rewritten,
      SerializedMessageWriter& message_writer) const {
    if (field_for_remove.raw < fields_to_remove.size() &&
        fields_to_remove[field_for_remove.raw]) {
      return absl::OkStatus();
    }
    return message_writer.WriteFixed64(field_number, repr);
  }

  absl::Status HandleLengthDelimitedFromReader(
      ReaderSpan<> repr,
      ABSL_ATTRIBUTE_UNUSED absl::Span<FieldValues> fields_to_add,
      absl::Span<const bool> fields_to_remove,
      ABSL_ATTRIBUTE_UNUSED absl::Span<bool> submessages_rewritten,
      SerializedMessageWriter& message_writer) const {
    if (field_for_remove.raw < fields_to_remove.size() &&
        fields_to_remove[field_for_remove.raw]) {
      return SkipLengthDelimitedFromReader(std::move(repr));
    }
    return message_writer.WriteString(field_number, std::move(repr));
  }

  absl::Status HandleLengthDelimitedFromCord(
      CordIteratorSpan repr, ABSL_ATTRIBUTE_UNUSED std::string& scratch,
      ABSL_ATTRIBUTE_UNUSED absl::Span<FieldValues> fields_to_add,
      absl::Span<const bool> fields_to_remove,
      ABSL_ATTRIBUTE_UNUSED absl::Span<bool> submessages_rewritten,
      SerializedMessageWriter& message_writer) const {
    if (field_for_remove.raw < fields_to_remove.size() &&
        fields_to_remove[field_for_remove.raw]) {
      return SkipLengthDelimitedFromCord(std::move(repr));
    }
    return message_writer.WriteString(field_number, std::move(repr));
  }

  absl::Status HandleLengthDelimitedFromString(
      absl::string_view repr,
      ABSL_ATTRIBUTE_UNUSED absl::Span<FieldValues> fields_to_add,
      absl::Span<const bool> fields_to_remove,
      ABSL_ATTRIBUTE_UNUSED absl::Span<bool> submessages_rewritten,
      SerializedMessageWriter& message_writer) const {
    if (field_for_remove.raw < fields_to_remove.size() &&
        fields_to_remove[field_for_remove.raw]) {
      return absl::OkStatus();
    }
    return message_writer.WriteString(field_number, repr);
  }

  absl::Status HandleBeginGroup(
      ABSL_ATTRIBUTE_UNUSED absl::Span<FieldValues> fields_to_add,
      absl::Span<const bool> fields_to_remove,
      ABSL_ATTRIBUTE_UNUSED absl::Span<bool> submessages_rewritten,
      SerializedMessageWriter& message_writer) const {
    if (field_for_remove.raw < fields_to_remove.size() &&
        fields_to_remove[field_for_remove.raw]) {
      return absl::OkStatus();
    }
    return message_writer.OpenGroup(field_number);
  }

  absl::Status HandleEndGroup(
      ABSL_ATTRIBUTE_UNUSED absl::Span<FieldValues> fields_to_add,
      absl::Span<const bool> fields_to_remove,
      ABSL_ATTRIBUTE_UNUSED absl::Span<bool> submessages_rewritten,
      SerializedMessageWriter& message_writer) const {
    if (field_for_remove.raw < fields_to_remove.size() &&
        fields_to_remove[field_for_remove.raw]) {
      return absl::OkStatus();
    }
    return message_writer.CloseGroup(field_number);
  }

  int field_number;
  FieldForRemove field_for_remove;
};

struct SerializedMessageAssembler::SubmessageHandler {
  static constexpr int kFieldNumber = kUnboundFieldNumber;

  absl::Status HandleLengthDelimitedFromReader(
      ReaderSpan<> repr, absl::Span<FieldValues> fields_to_add,
      absl::Span<const bool> fields_to_remove,
      absl::Span<bool> submessages_rewritten,
      SerializedMessageWriter& message_writer) const {
    if (field_for_remove.raw < fields_to_remove.size() &&
        fields_to_remove[field_for_remove.raw]) {
      return SkipLengthDelimitedFromReader(std::move(repr));
    }
    submessages_rewritten[submessage_index_in_parent] = true;
    message_writer.OpenLengthDelimited();
    if (absl::Status status =
            RewriteFields(fields_to_add, fields_to_remove, *message,
                          std::move(repr), message_writer);
        ABSL_PREDICT_FALSE(!status.ok())) {
      return status;
    }
    return message_writer.CloseLengthDelimited(message->field_number);
  }

  absl::Status HandleLengthDelimitedFromCord(
      CordIteratorSpan repr, ABSL_ATTRIBUTE_UNUSED std::string& scratch,
      absl::Span<FieldValues> fields_to_add,
      absl::Span<const bool> fields_to_remove,
      absl::Span<bool> submessages_rewritten,
      SerializedMessageWriter& message_writer) const {
    if (field_for_remove.raw < fields_to_remove.size() &&
        fields_to_remove[field_for_remove.raw]) {
      return SkipLengthDelimitedFromCord(std::move(repr));
    }
    submessages_rewritten[submessage_index_in_parent] = true;
    message_writer.OpenLengthDelimited();
    if (absl::Status status =
            RewriteFields(fields_to_add, fields_to_remove, *message,
                          std::move(repr), message_writer);
        ABSL_PREDICT_FALSE(!status.ok())) {
      return status;
    }
    return message_writer.CloseLengthDelimited(message->field_number);
  }

  absl::Status HandleLengthDelimitedFromString(
      absl::string_view repr, absl::Span<FieldValues> fields_to_add,
      absl::Span<const bool> fields_to_remove,
      absl::Span<bool> submessages_rewritten,
      SerializedMessageWriter& message_writer) const {
    if (field_for_remove.raw < fields_to_remove.size() &&
        fields_to_remove[field_for_remove.raw]) {
      return absl::OkStatus();
    }
    submessages_rewritten[submessage_index_in_parent] = true;
    message_writer.OpenLengthDelimited();
    if (absl::Status status = RewriteFields(fields_to_add, fields_to_remove,
                                            *message, repr, message_writer);
        ABSL_PREDICT_FALSE(!status.ok())) {
      return status;
    }
    return message_writer.CloseLengthDelimited(message->field_number);
  }

  SubmessageIndex submessage_index_in_parent;
  FieldForRemove field_for_remove;
  const RegisteredMessage* message;
};

void SerializedMessageAssembler::Build(int field_number,
                                       RegisteredFieldBuilder&& message_builder,
                                       RegisteredMessage& message) {
  RIEGELI_ASSERT(message.submessages.empty())
      << "Failed precondition of SerializedMessageAssembler::Build(): "
         "message not empty";
  message.field_number = field_number;
  message.parent_for_add = message_builder.parent_for_add;
  size_t num_submessages = 0;
  RewriteHandlers::Builder fields_builder;
  for (auto& entry : message_builder.children) {
    if (entry.second.parent_for_add.raw != ParentForAdd::kUnregistered ||
        !entry.second.children.empty()) {
      ++num_submessages;
    }
  }
  message.submessages.resize(num_submessages);
  size_t submessage_index = 0;
  for (auto& entry : message_builder.children) {
    if (entry.second.parent_for_add.raw != ParentForAdd::kUnregistered ||
        !entry.second.children.empty()) {
      RegisteredMessage* const submessage =
          &message.submessages[submessage_index];
      fields_builder.RegisterField(
          entry.first,
          SubmessageHandler{IntCast<SubmessageIndex>(submessage_index),
                            entry.second.field_for_remove, submessage});
      Build(entry.first, std::move(entry.second), *submessage);
      ++submessage_index;
    } else {
      fields_builder.RegisterField(
          entry.first, LeafHandler{entry.first, entry.second.field_for_remove});
    }
  }
  RIEGELI_ASSERT_EQ(submessage_index, num_submessages)
      << "The whole submessages array should have been filled";
  riegeli::Reset(message.handlers, std::move(fields_builder));
}

template absl::Status SerializedMessageAssembler::RewriteFields(
    absl::Span<FieldValues> fields_to_add,
    absl::Span<const bool> fields_to_remove, const RegisteredMessage& message,
    ReaderSpan<>&& src, SerializedMessageWriter& message_writer);
template absl::Status SerializedMessageAssembler::RewriteFields(
    absl::Span<FieldValues> fields_to_add,
    absl::Span<const bool> fields_to_remove, const RegisteredMessage& message,
    CordIteratorSpan&& src, SerializedMessageWriter& message_writer);
template absl::Status SerializedMessageAssembler::RewriteFields(
    absl::Span<FieldValues> fields_to_add,
    absl::Span<const bool> fields_to_remove, const RegisteredMessage& message,
    absl::string_view&& src, SerializedMessageWriter& message_writer);

absl::Status SerializedMessageAssembler::WriteNewFields(
    absl::Span<FieldValues> fields_to_add, const RegisteredMessage& message,
    absl::Span<const bool> submessages_rewritten,
    SerializedMessageWriter& message_writer) {
  const absl::Span<const RegisteredMessage> submessages = message.submessages;
  RIEGELI_ASSERT_EQ(submessages.size(), submessages_rewritten.size());

  // Create submessages for new fields and write these fields.
  for (size_t submessage_index = 0; submessage_index < submessages.size();
       ++submessage_index) {
    if (submessages_rewritten[submessage_index]) continue;
    const RegisteredMessage& submessage = submessages[submessage_index];
    message_writer.OpenLengthDelimited();
    if (absl::Status status = WriteNewFields(
            fields_to_add, submessage,
            SubmessageSet(submessage.submessages.size()), message_writer);
        ABSL_PREDICT_FALSE(!status.ok())) {
      return status;
    }
    if (absl::Status status = message_writer.CloseOptionalLengthDelimited(
            submessage.field_number);
        ABSL_PREDICT_FALSE(!status.ok())) {
      return status;
    }
  }

  // Write new fields directly to this message.
  if (message.parent_for_add.raw < fields_to_add.size()) {
    FieldValues& field_values = fields_to_add[message.parent_for_add.raw];
    for (FieldValue& field_value : field_values) {
      if (absl::Status status = std::move(field_value)(message_writer);
          ABSL_PREDICT_FALSE(!status.ok())) {
        return status;
      }
    }
    field_values.clear();
  }

  return absl::OkStatus();
}

}  // namespace riegeli


================================================
FILE: riegeli/messages/serialized_message_assembler.h
================================================
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_MESSAGES_SERIALIZED_MESSAGE_ASSEMBLER_H_
#define RIEGELI_MESSAGES_SERIALIZED_MESSAGE_ASSEMBLER_H_

#include <stddef.h>
#include <stdint.h>

#include <functional>
#include <limits>
#include <string>
#include <type_traits>
#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/container/inlined_vector.h"
#include "absl/container/linked_hash_map.h"
#include "absl/functional/any_invocable.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/types/span.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/cord_iterator_span.h"
#include "riegeli/base/dependency.h"
#include "riegeli/bytes/chain_writer.h"
#include "riegeli/bytes/cord_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/string_writer.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/messages/field_copier.h"
#include "riegeli/messages/field_handler_map.h"
#include "riegeli/messages/serialized_message_reader.h"
#include "riegeli/messages/serialized_message_writer.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// `SerializedMessageAssembler` writes a serialized proto message based on a
// serialized base message and added and/or removed fields.
//
// Parents of added fields are created implicitly if they are not present in the
// base message.
//
// A `SerializedMessageAssembler` object maintains a structure of fields to be
// added and/or removed, which can be efficiently reused for multiple messages.
//
// Usage:
// ```
//   // Register message structure.
//   riegeli::SerializedMessageAssembler::Builder message_assembler_builder;
//   for (const auto& field_name : fields_to_add) {
//     field_name.parent_for_add =
//         message_assembler_builder.RegisterParentForAdd(
//             field_name.parent_path);
//   }
//   for (const auto& field_name : fields_to_remove) {
//     field_name.parent_for_remove =
//         message_assembler_builder.RegisterFieldForRemove(
//             field_name.parent_path, field_name.field_number);
//   }
//   SerializedMessageAssembler message_assembler(
//       std::move(message_assembler_builder));
//
//   // Assemble messages.
//   for (auto& message : messages) {
//     riegeli::SerializedMessageAssembler::Session session(message_assembler);
//     for (const auto& field : message.fields_to_add) {
//       session.AddField(
//           field.parent_for_add,
//           [field_number = field.field_number, value = field.value](
//               riegeli::SerializedMessageWriter& message_writer) mutable {
//             // Example for a string field:
//             return message_writer.WriteString(field_number,
//                                               std::move(value));
//           });
//     }
//     for (const auto& field : message.fields_to_remove) {
//       session.RemoveField(field.parent_for_remove);
//     }
//     std::move(session).WriteMessage(message_assembler, base_message,
//                                     message.serialized);
//   }
// ```
//
// Alternatively, the stages of registration and assembly can be interleaved.
// This is reasonable for assembling a single message. This would be inefficient
// for multiple messages.
// ```
//   riegeli::SerializedMessageAssembler::Builder message_assembler_builder;
//   riegeli::SerializedMessageAssembler::Session session;
//   for (const auto& field : fields_to_add) {
//     session.AddField(
//         message_assembler_builder.RegisterParentForAdd(field.parent_path),
//         [field_number = field.field_number, value = field.value](
//             riegeli::SerializedMessageWriter& message_writer) mutable {
//           // Example for a string field:
//           return message_writer.WriteString(field_number, std::move(value));
//         });
//   }
//   for (const auto& field : fields_to_remove) {
//     session.RemoveField(message_assembler_builder.RegisterFieldForRemove(
//         field.parent_path, field.field_number));
//   }
//   SerializedMessageAssembler message_assembler(
//       std::move(message_assembler_builder));
//   std::move(session).WriteMessage(message_assembler, base_message,
//                                   serialized_message);
// ```
class SerializedMessageAssembler {
 public:
  // A sequence of field numbers identifying a length-delimited field.
  //
  // Each element of a `ParentPath` should correspond to a singular submessage
  // field contained in the previous message, except that the last element may
  // correspond to a singular `string` or `bytes` field, or to a packed repeated
  // field.
  //
  // An empty `ParentPath` indicates the root message.
  using ParentPath = absl::Span<const int>;

  // Identifies the parent of fields to be added, binding
  // `Builder::RegisterParentForAdd()` with `Session::AddField()`.
  // Usually treated as an opaque type.
  struct ParentForAdd {
    static constexpr uint32_t kUnregistered =
        std::numeric_limits<uint32_t>::max();
    uint32_t raw = kUnregistered;
  };

  // Identifies a field to be removed, binding
  // `Builder::RegisterFieldForRemove()` with `Session::RemoveField()`.
  // Usually treated as an opaque type.
  struct FieldForRemove {
    static constexpr uint32_t kUnregistered =
        std::numeric_limits<uint32_t>::max();
    uint32_t raw = kUnregistered;
  };

  // Prepares a `SerializedMessageAssembler`.
  class Builder;

  // Maintains field values during writing a single message.
  class Session;

  // Creates an empty `SerializedMessageAssembler`. Designed for `Reset()`.
  SerializedMessageAssembler() = default;

  // Builds a `SerializedMessageAssembler`.
  explicit SerializedMessageAssembler(Builder&& builder);

  SerializedMessageAssembler(SerializedMessageAssembler&& that) noexcept;
  SerializedMessageAssembler& operator=(
      SerializedMessageAssembler&& that) noexcept;

  ~SerializedMessageAssembler();

  // Makes `*this` equivalent to a newly constructed
  // `SerializedMessageAssembler`.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset();
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Builder&& builder);

 private:
  // A value to be written to a `SerializedMessageWriter`. Its type varies,
  // so it is represented as an invocable.
  using FieldValue = absl::AnyInvocable<absl::Status(
      SerializedMessageWriter& message_writer) &&>;
  // Values to be written to a `SerializedMessageWriter` under one parent.
  using FieldValues = std::vector<FieldValue>;

  // Which submessages of a given message are present in the base message,
  // in the order corresponding to `RegisteredMessage::submessages`.
  using SubmessageSet = absl::InlinedVector<bool, 8>;
  // An index into `SubmessageSet`.
  using SubmessageIndex = uint32_t;

  // Field handlers for reading the base message.
  using RewriteHandlers = FieldHandlerMap<
      // Remaining field values to write, indexed by `ParentForAdd`.
      const absl::Span<FieldValues>,
      // Fields to remove, indexed by `FieldForRemove`.
      const absl::Span<const bool>,
      // Which submessages of this message have been rewritten, in the order
      // corresponding to `RegisteredMessage::submessages`. Submessages absent
      // in the base message will be created at the end of the message.
      const absl::Span<bool>,
      // The destination message writer.
      SerializedMessageWriter>;

  // During registration, maintains information about a field or root.
  struct RegisteredFieldBuilder {
    ParentForAdd parent_for_add;
    FieldForRemove field_for_remove;  // Not used in root.
    absl::linked_hash_map<int, RegisteredFieldBuilder> children;
  };

  // During assembly, maintains information about a message, including root.
  struct RegisteredMessage {
    RegisteredMessage() noexcept : field_number(0) {}
    explicit RegisteredMessage(int field_number) : field_number(field_number) {}

    int field_number;  // Not used in root.
    ParentForAdd parent_for_add;
    // In registration order. Must not be reallocated after initialization.
    std::vector<RegisteredMessage> submessages;
    // `handlers` may refer to elements of `submessages`.
    RewriteHandlers handlers;
  };

  struct LeafHandler;
  struct SubmessageHandler;

  static RegisteredMessage Build(int field_number,
                                 RegisteredFieldBuilder&& message_builder) {
    RegisteredMessage message;
    Build(field_number, std::move(message_builder), message);
    return message;
  }

  static void Build(int field_number, RegisteredFieldBuilder&& message_builder,
                    RegisteredMessage& message);

  // Copies fields, rewriting submessages present in the base message.
  template <typename Src>
  static absl::Status RewriteFields(absl::Span<FieldValues> fields_to_add,
                                    absl::Span<const bool> fields_to_remove,
                                    const RegisteredMessage& message, Src&& src,
                                    SerializedMessageWriter& message_writer);

  // Writes fields from submessages which were not present in the base message,
  // and fields added directly to the message. Creates required parent
  // submessages if they turn out to be non-empty.
  static absl::Status WriteNewFields(
      absl::Span<FieldValues> fields_to_add, const RegisteredMessage& message,
      absl::Span<const bool> submessages_rewritten,
      SerializedMessageWriter& message_writer);

  size_t num_fields_for_add() const { return size_t{num_fields_for_add_.raw}; }
  size_t num_fields_for_remove() const {
    return size_t{num_fields_for_remove_.raw};
  }

  ParentForAdd num_fields_for_add_ = {/*raw=*/0};
  FieldForRemove num_fields_for_remove_ = {/*raw=*/0};
  RegisteredMessage root_;
};

class SerializedMessageAssembler::Builder {
 public:
  // Creates an empty `SerializedMessageAssembler::Builder`.
  Builder() = default;

  Builder(Builder&& that) noexcept;
  Builder& operator=(Builder&& that) noexcept;

  ~Builder();

  // Makes `*this` equivalent to a newly constructed
  // `SerializedMessageAssembler::Builder`.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset();

  // Registers interest in writing fields under this parent.
  //
  // If the parent message is present in the base message (and not removed),
  // and/or is used for multiple fields, then its contents are merged. If it is
  // present multiple times in the base message, then contents are merged to its
  // first occurrence.
  //
  // Returns a `ParentForAdd` to be passed to `Session::AddField()`.
  //
  // Can be called multiple times with the same `parent_path`, returning the
  // same `ParentForAdd` each time.
  ParentForAdd RegisterParentForAdd(ParentPath parent_path);

  // Registers interest in removing this field from the base message.
  //
  // Returns a `FieldForRemove` to be passed to `Session::RemoveField()`.
  //
  // Can be called multiple times with the same `parent_path` and
  // `field_number`, returning the same `FieldForRemove` each time.
  FieldForRemove RegisterFieldForRemove(ParentPath parent_path,
                                        int field_number);

 private:
  friend class SerializedMessageAssembler;  // For member variables.

  RegisteredFieldBuilder& RegisterParentInternal(ParentPath parent_path);

  ParentForAdd next_parent_for_add_ = {/*raw=*/0};
  FieldForRemove next_field_for_remove_ = {/*raw=*/0};
  RegisteredFieldBuilder root_builder_;
};

class SerializedMessageAssembler::Session {
 private:
  template <typename Src>
  struct IsSource
      : std::disjunction<TargetRefSupportsDependency<Reader*, Src>,
                         std::is_convertible<Src&&, BytesRef>,
                         std::is_convertible<Src&&, Chain>,
                         std::is_convertible<Src&&, const absl::Cord&>,
                         std::is_convertible<Src&&, CordIteratorSpan>> {};

  template <typename Dest>
  struct IsDestination
      : std::disjunction<TargetRefSupportsDependency<Writer*, Dest>,
                         std::is_convertible<Dest&&, std::string&>,
                         std::is_convertible<Dest&&, Chain&>,
                         std::is_convertible<Dest&&, absl::Cord&>> {};

 public:
  // The constructor to use when the `SerializedMessageAssembler` does not have
  // registered fields yet. In this case there is no benefit in passing the
  // `SerializedMessageAssembler` to the constructor.
  Session() = default;

  // The constructor to use when the `SerializedMessageAssembler` has fields
  // already registered. In this case this constructor is more efficient than
  // the default constructor.
  explicit Session(const SerializedMessageAssembler& assembler)
      : fields_to_add_(assembler.num_fields_for_add()),
        fields_to_remove_(assembler.num_fields_for_remove()) {}

  // The source `Session` is left empty.
  Session(Session&& that) noexcept;
  Session& operator=(Session&& that) noexcept;

  ~Session();

  // Makes `*this` equivalent to a newly constructed `Session`.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset();
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      const SerializedMessageAssembler& assembler);

  // Adds the value of a field identified by `parent_for_add`, which was
  // returned by `Builder::RegisterParentForAdd()`.
  //
  // `action` will be called exactly once by a successful `WriteMessage()`,
  // with a `SerializedMessageWriter&` parameter. It should write the field
  // there and return `absl::Status`.
  //
  // If the parent message is present in the base message (and not removed),
  // and/or `AddField()` is called multiple times with the same
  // `parent_for_add`, then the contents of the parent message are merged. If it
  // is present multiple times in the base message, then contents are merged to
  // its first occurrence.
  //
  // If multiple fields are written to a parent message, then they are written
  // separately rather than merged.
  //
  // The consequences of having multiple occurrences of a field follow the proto
  // semantics of merging:
  //
  //  * A singular non-message field overrides the previous value.
  //  * A singular message field is merged with the previous value.
  //  * A repeated field has a new element added.
  //
  // In the case of a singular field or a packed repeated field, this makes the
  // serialized representation non-canonical or with redundant overridden data.
  // In these cases it is better to avoid having multiple occurrences. Even
  // though regular proto parsing merges these occurrences, this can be
  // confusing for non-standard parsers, and the serialized representation is
  // larger than necessary.
  //
  // It follows that there are two ways to write a length-delimited field:
  //
  //  1. Specify its parent in `RegisterParentForAdd()`.
  //     Write the whole field in `AddField()` action.
  //
  //  2. Specify the field itself in `RegisterParentForAdd()`.
  //     Write its contents only in `AddField()` action.
  //
  // If the field is not present in the base message (or removed), and
  // `AddField()` is called once, then both ways have the same effect.
  // The first way is more efficient, because the length of field contents
  // is usually known in advance, in which case the contents can be written
  // directly instead of gathering them separately to compute their length
  // when they are complete.
  //
  // Otherwise, the first way writes a separate field, which may undergo
  // implicit merging during parsing, while the second way merges it in the
  // serialized message.
  template <
      typename Action,
      std::enable_if_t<std::is_invocable_v<Action&&, SerializedMessageWriter&>,
                       int> = 0>
  void AddField(ParentForAdd parent_for_add, Action&& action);

  // Removes from the base message the field identified by `parent_for_remove`,
  // which was returned by `Builder::RegisterFieldForRemove()`.
  //
  // If the field occurs multiple times, all its occurrences are removed.
  //
  // If `RemoveField()` is called multiple times with the same
  // `parent_for_remove`, the effect is the same as a single call.
  //
  // This does not apply to fields added with `AddField()`, only to fields from
  // the base message.
  void RemoveField(FieldForRemove parent_for_remove);

  // Writes a serialized message based on the serialized message in `base_src`
  // (empty by default), adding fields specified by `Session::AddField()`,
  // and removing fields specified by `Session::RemoveField()`.
  //
  // The `Session` is left empty.
  template <
      typename BaseSrc, typename Dest,
      std::enable_if_t<
          std::conjunction_v<IsSource<BaseSrc>, IsDestination<Dest>>, int> = 0>
  absl::Status WriteMessage(const SerializedMessageAssembler& message_assembler,
                            BaseSrc&& base_src, Dest&& dest) &&;
  template <typename Dest,
            std::enable_if_t<IsDestination<Dest>::value, int> = 0>
  absl::Status WriteMessage(const SerializedMessageAssembler& message_assembler,
                            Dest&& dest) &&;

 private:
  template <
      typename Dest,
      std::enable_if_t<TargetRefSupportsDependency<Writer*, Dest>::value, int>>
  static Dest&& MakeWriter(Dest&& dest) {
    return std::forward<Dest>(dest);
  }
  static StringWriter<> MakeWriter(std::string& dest) {
    return StringWriter(&dest);
  }
  static ChainWriter<> MakeWriter(Chain& dest) { return ChainWriter(&dest); }
  static CordWriter<> MakeWriter(absl::Cord& dest) { return CordWriter(&dest); }

  template <typename BaseSrc, typename Dest>
  absl::Status WriteMessageInternal(
      const SerializedMessageAssembler& message_assembler, BaseSrc&& base_src,
      Dest&& dest) &&;
  template <typename Dest>
  absl::Status WriteMessageInternal(
      const SerializedMessageAssembler& message_assembler, Dest&& dest) &&;

  // Indexed by `ParentForAdd`.
  std::vector<FieldValues> fields_to_add_;
  // Indexed by `FieldForRemove`.
  absl::InlinedVector<bool, 8> fields_to_remove_;
};

// Implementation details follow.

template <typename Action,
          std::enable_if_t<
              std::is_invocable_v<Action&&, SerializedMessageWriter&>, int>>
inline void SerializedMessageAssembler::Session::AddField(
    ParentForAdd parent_for_add, Action&& action) {
  if (ABSL_PREDICT_FALSE(parent_for_add.raw >= fields_to_add_.size())) {
    fields_to_add_.resize(parent_for_add.raw + 1);
  }
  fields_to_add_[parent_for_add.raw].emplace_back(std::forward<Action>(action));
}

inline void SerializedMessageAssembler::Session::RemoveField(
    FieldForRemove parent_for_remove) {
  if (ABSL_PREDICT_FALSE(parent_for_remove.raw >= fields_to_remove_.size())) {
    fields_to_remove_.resize(parent_for_remove.raw + 1);
  }
  fields_to_remove_[parent_for_remove.raw] = true;
}

template <typename BaseSrc, typename Dest,
          std::enable_if_t<
              std::conjunction_v<
                  SerializedMessageAssembler::Session::IsSource<BaseSrc>,
                  SerializedMessageAssembler::Session::IsDestination<Dest>>,
              int>>
absl::Status SerializedMessageAssembler::Session::WriteMessage(
    const SerializedMessageAssembler& message_assembler, BaseSrc&& base_src,
    Dest&& dest) && {
  return std::move(*this).WriteMessageInternal(
      message_assembler, std::forward<BaseSrc>(base_src),
      MakeWriter(std::forward<Dest>(dest)));
}

template <
    typename Dest,
    std::enable_if_t<
        SerializedMessageAssembler::Session::IsDestination<Dest>::value, int>>
absl::Status SerializedMessageAssembler::Session::WriteMessage(
    const SerializedMessageAssembler& message_assembler, Dest&& dest) && {
  return std::move(*this).WriteMessageInternal(
      message_assembler, MakeWriter(std::forward<Dest>(dest)));
}

template <typename BaseSrc, typename Dest>
inline absl::Status SerializedMessageAssembler::Session::WriteMessageInternal(
    const SerializedMessageAssembler& message_assembler, BaseSrc&& base_src,
    Dest&& dest) && {
  DependencyRef<Writer*, Dest> dest_dep(std::forward<Dest>(dest));
  SerializedMessageWriter message_writer(dest_dep.get());

  absl::Status status = RewriteFields(
      absl::MakeSpan(fields_to_add_), absl::MakeSpan(fields_to_remove_),
      message_assembler.root_, std::forward<BaseSrc>(base_src), message_writer);

  // Each element of `fields_to_add_` is already cleared.
  fields_to_remove_.clear();
  if (dest_dep.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_dep->Close())) {
      status.Update(dest_dep->status());
    }
  }
  return status;
}

template <typename Dest>
inline absl::Status SerializedMessageAssembler::Session::WriteMessageInternal(
    const SerializedMessageAssembler& message_assembler, Dest&& dest) && {
  DependencyRef<Writer*, Dest> dest_dep(std::forward<Dest>(dest));
  SerializedMessageWriter message_writer(dest_dep.get());

  absl::Status status =
      WriteNewFields(absl::MakeSpan(fields_to_add_), message_assembler.root_,
                     SubmessageSet(message_assembler.root_.submessages.size()),
                     message_writer);

  // Each element of `fields_to_add_` is already cleared.
  fields_to_remove_.clear();
  if (dest_dep.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_dep->Close())) {
      status.Update(dest_dep->status());
    }
  }
  return status;
}

template <typename Src>
absl::Status SerializedMessageAssembler::RewriteFields(
    absl::Span<FieldValues> fields_to_add,
    absl::Span<const bool> fields_to_remove, const RegisteredMessage& message,
    Src&& src, SerializedMessageWriter& message_writer) {
  SubmessageSet submessages_rewritten(message.submessages.size());

  // Rewrite fields which are present in the base message.
  if (absl::Status status =
          SerializedMessageReader<
              const absl::Span<FieldValues>, const absl::Span<const bool>,
              const absl::Span<bool>, SerializedMessageWriter>(
              std::cref(message.handlers), AnyFieldCopier())
              .ReadMessage(
                  std::forward<Src>(src), fields_to_add, fields_to_remove,
                  absl::MakeSpan(submessages_rewritten), message_writer);
      ABSL_PREDICT_FALSE(!status.ok())) {
    return status;
  }

  // Write new fields which were not present in the base message.
  return WriteNewFields(fields_to_add, message, submessages_rewritten,
                        message_writer);
}

extern template absl::Status SerializedMessageAssembler::RewriteFields(
    absl::Span<FieldValues> fields_to_add,
    absl::Span<const bool> fields_to_remove, const RegisteredMessage& message,
    ReaderSpan<>&& src, SerializedMessageWriter& message_writer);
extern template absl::Status SerializedMessageAssembler::RewriteFields(
    absl::Span<FieldValues> fields_to_add,
    absl::Span<const bool> fields_to_remove, const RegisteredMessage& message,
    CordIteratorSpan&& src, SerializedMessageWriter& message_writer);
extern template absl::Status SerializedMessageAssembler::RewriteFields(
    absl::Span<FieldValues> fields_to_add,
    absl::Span<const bool> fields_to_remove, const RegisteredMessage& message,
    absl::string_view&& src, SerializedMessageWriter& message_writer);

}  // namespace riegeli

#endif  // RIEGELI_MESSAGES_SERIALIZED_MESSAGE_ASSEMBLER_H_


================================================
FILE: riegeli/messages/serialized_message_backward_writer.cc
================================================
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/messages/serialized_message_backward_writer.h"

#include <stddef.h>
#include <stdint.h>

#include <utility>

#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "riegeli/base/any.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/cord_iterator_span.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/copy_all.h"
#include "riegeli/bytes/reader.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

absl::Status SerializedMessageBackwardWriter::LengthOverflowError(
    Position length) {
  return absl::ResourceExhaustedError(
      absl::StrCat("Failed to write length-delimited field "
                   "because its size must be smaller than 2GiB: ",
                   length));
}

absl::Status SerializedMessageBackwardWriter::WriteStringFailed(
    Reader& src, BackwardWriter& dest) {
  return !dest.ok()
             ? dest.status()
             : src.StatusOrAnnotate(absl::InvalidArgumentError(
                   "Could not read contents for a length-delimited field"));
}

absl::Status SerializedMessageBackwardWriter::WriteString(int field_number,
                                                          AnyRef<Reader*> src) {
  if (src.IsOwning()) src->SetReadAllHint(true);
  const Position pos_after = writer().pos();
  if (absl::Status status = CopyAll(std::move(src), writer());
      ABSL_PREDICT_FALSE(!status.ok())) {
    return status;
  }
  RIEGELI_ASSERT_GE(writer().pos(), pos_after)
      << "CopyAll() decreased dest.pos()";
  return WriteLengthUnchecked(field_number, writer().pos() - pos_after);
}

absl::Status SerializedMessageBackwardWriter::WriteString(
    int field_number, CordIteratorSpan src) {
  if (src.length() <= kMaxBytesToCopy) {
    if (ABSL_PREDICT_FALSE(!writer().Push(src.length()))) {
      return writer().status();
    }
    writer().move_cursor(src.length());
    CordIteratorSpan::Read(src.iterator(), src.length(), writer().cursor());
    return WriteLengthUnchecked(field_number, src.length());
  }
  return WriteString(field_number, std::move(src).ToCord());
}

void SerializedMessageBackwardWriter::OpenLengthDelimited() {
  submessages_.push_back(writer().pos());
}

absl::Status SerializedMessageBackwardWriter::CloseLengthDelimited(
    int field_number) {
  RIEGELI_ASSERT(!submessages_.empty())
      << "Failed precondition of "
         "SerializedMessageBackwardWriter::CloseLengthDelimited(): "
         "no matching OpenLengthDelimited() call";
  RIEGELI_ASSERT_GE(writer().pos(), submessages_.back())
      << "Failed precondition of "
         "SerializedMessageBackwardWriter::CloseLengthDelimited(): "
         "writer().pos() decreased since OpenLengthDelimited()";
  const Position length = writer().pos() - submessages_.back();
  submessages_.pop_back();
  return WriteLengthUnchecked(field_number, length);
}

absl::Status SerializedMessageBackwardWriter::CloseOptionalLengthDelimited(
    int field_number) {
  RIEGELI_ASSERT(!submessages_.empty())
      << "Failed precondition of "
         "SerializedMessageBackwardWriter::CloseOptionalLengthDelimited(): "
         "no matching OpenLengthDelimited() call";
  RIEGELI_ASSERT_GE(writer().pos(), submessages_.back())
      << "Failed precondition of "
         "SerializedMessageBackwardWriter::CloseOptionalLengthDelimited(): "
         "writer().pos() decreased since OpenLengthDelimited()";
  const Position length = writer().pos() - submessages_.back();
  submessages_.pop_back();
  if (length > 0) return WriteLengthUnchecked(field_number, length);
  return absl::OkStatus();
}

}  // namespace riegeli


================================================
FILE: riegeli/messages/serialized_message_backward_writer.h
================================================
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_MESSAGES_SERIALIZED_MESSAGE_BACKWARD_WRITER_H_
#define RIEGELI_MESSAGES_SERIALIZED_MESSAGE_BACKWARD_WRITER_H_

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <type_traits>
#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/base/casts.h"
#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/any.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/cord_iterator_span.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/limiting_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/stringify.h"
#include "riegeli/endian/endian_writing.h"
#include "riegeli/messages/message_wire_format.h"
#include "riegeli/messages/serialize_message.h"
#include "riegeli/varint/varint_writing.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// `SerializedMessageBackwardWriter` builds a serialized proto message,
// specifying contents of particular fields, instead of traversing an in-memory
// message object like in `SerializeMessage()`. Building proceeds back to front,
// using `BackwardWriter`.
//
// Use cases:
//
//  * Processing a subset of fields without the overhead of materializing the
//    message object, i.e. without processing fields contained in submessages
//    which can be processed as a whole, and without keeping the whole parsed
//    message in memory.
//
//  * Processing a message in a way known at runtime, possibly with the schema
//    known at runtime, possibly partially known.
//
//  * Processing messages with so many elements of toplevel repeated fields that
//    the total message size exceeds 2GiB. This is not a great idea in itself,
//    because such messages cannot be processed using native proto parsing and
//    serialization.
//
// `SerializedMessageBackwardWriter` is more efficient than
// `SerializedMessageWriter` in the case of nested messages, because their
// contents can be written directly to the original `BackwardWriter`, with the
// length known and written after building the contents.
//
// Building elements of a repeated field is done in the opposite order than in
// `SerializedMessageWriter`, and if a non-repeated field is written multiple
// times then they are overridden or merged in the opposite order. Otherwise the
// field order does not influence how the message is parsed.
class SerializedMessageBackwardWriter {
 public:
  // An empty object. It can be associated with a particular message by
  // `set_dest()` or assignment.
  //
  // An empty `SerializedMessageBackwardWriter` is not usable directly.
  SerializedMessageBackwardWriter() = default;

  // Will write to `*dest`, which is not owned and must outlive usages of this
  // object.
  explicit SerializedMessageBackwardWriter(
      BackwardWriter* absl_nullable dest ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : dest_(dest) {}

  SerializedMessageBackwardWriter(
      SerializedMessageBackwardWriter&& that) noexcept;
  SerializedMessageBackwardWriter& operator=(
      SerializedMessageBackwardWriter&& that) noexcept;

  // Returns the original `BackwardWriter` of the root message.
  BackwardWriter* absl_nullable dest() const { return dest_; }

  // Changes the `BackwardWriter` of the root message.
  //
  // This can be called even during building, even when submessages are open,
  // but the position must be the same. It particular this must be called when
  // the original `BackwardWriter` has been moved.
  void set_dest(BackwardWriter* absl_nullable dest);

  // Returns the `BackwardWriter` of the current message or length-delimited
  // field being built.
  //
  // `writer()` is the same as `*dest()`. Functions are separate for consistency
  // with `SerializedMessageWriter`.
  //
  // This can be used to write parts of the message directly, apart from
  // `Write...()` functions which write whole fields.
  //
  // Building elements of a repeated field and writing parts of a single field
  // to `writer()` is done in the opposite order than in
  // `SerializedMessageWriter`.
  BackwardWriter& writer() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT(dest_ != nullptr)
        << "Failed precondition of SerializedMessageBackwardWriter::writer(): "
           "dest() not set";
    return *dest_;
  }

  // Writes the field tag and the numeric or enum field value.
  absl::Status WriteInt32(int field_number, int32_t value);
  absl::Status WriteInt64(int field_number, int64_t value);
  absl::Status WriteUInt32(int field_number, uint32_t value);
  absl::Status WriteUInt64(int field_number, uint64_t value);
  absl::Status WriteSInt32(int field_number, int32_t value);
  absl::Status WriteSInt64(int field_number, int64_t value);
  absl::Status WriteBool(int field_number, bool value);
  absl::Status WriteFixed32(int field_number, uint32_t value);
  absl::Status WriteFixed64(int field_number, uint64_t value);
  absl::Status WriteSFixed32(int field_number, int32_t value);
  absl::Status WriteSFixed64(int field_number, int64_t value);
  absl::Status WriteFloat(int field_number, float value);
  absl::Status WriteDouble(int field_number, double value);
  template <typename EnumType,
            std::enable_if_t<std::disjunction_v<std::is_enum<EnumType>,
                                                std::is_integral<EnumType>>,
                             int> = 0>
  absl::Status WriteEnum(int field_number, EnumType value);

  // Writes the field tag and the `string`, `bytes`, or submessage field value.
  //
  // Message objects are excluded here because they are stringified in the text
  // format, which is rarely intended as a field value. A separate overload
  // below serializes a message object in the binary format.
  template <typename... Values
#if !__cpp_concepts
            ,
            std::enable_if_t<
                std::conjunction_v<
                    IsStringifiable<Values...>,
                    std::negation<std::is_convertible<
                        Values&&, const google::protobuf::MessageLite&>>...>,
                int> = 0
#endif
            >
#if __cpp_concepts
  // For conjunctions, `requires` gives better error messages than
  // `std::enable_if_t`, indicating the relevant argument.
    requires((IsStringifiable<Values>::value && ...) &&
             (!std::is_convertible_v<Values &&,
                                     const google::protobuf::MessageLite&> &&
              ...))
#endif
  absl::Status WriteString(int field_number, Values&&... values);
  absl::Status WriteString(int field_number, AnyRef<Reader*> src);
  template <typename ReaderType>
  absl::Status WriteString(int field_number, ReaderSpan<ReaderType> src);
  absl::Status WriteString(int field_number, CordIteratorSpan src);

  // Writes the field tag of a length-delimited field and serializes a message
  // as the field value.
  absl::Status WriteString(int field_number,
                           const google::protobuf::MessageLite& message,
                           SerializeMessageOptions options = {});

  // Writes an element of a packed repeated field.
  //
  // The field must have been opened with `OpenLengthDelimited()` or
  // `WriteLengthUnchecked()`.
  absl::Status WritePackedInt32(int32_t value);
  absl::Status WritePackedInt64(int64_t value);
  absl::Status WritePackedUInt32(uint32_t value);
  absl::Status WritePackedUInt64(uint64_t value);
  absl::Status WritePackedSInt32(int32_t value);
  absl::Status WritePackedSInt64(int64_t value);
  absl::Status WritePackedBool(bool value);
  absl::Status WritePackedFixed32(uint32_t value);
  absl::Status WritePackedFixed64(uint64_t value);
  absl::Status WritePackedSFixed32(int32_t value);
  absl::Status WritePackedSFixed64(int64_t value);
  absl::Status WritePackedFloat(float value);
  absl::Status WritePackedDouble(double value);
  template <typename EnumType,
            std::enable_if_t<std::disjunction_v<std::is_enum<EnumType>,
                                                std::is_integral<EnumType>>,
                             int> = 0>
  absl::Status WritePackedEnum(EnumType value);

  // Begins accumulating contents of a length-delimited field.
  //
  // `writer().pos()` is remembered, and field contents written to `writer()`
  // are written directly to `dest()`.
  void OpenLengthDelimited();

  // Ends accumulating contents of a length-delimited field, and writes the
  // field tag and length to the parent message.
  //
  // Each `OpenLengthDelimited()` call must be matched with a
  // `CloseLengthDelimited()` or `CloseOptionalLengthDelimited()` call, unless
  // the `SerializedMessageBackwardWriter` and its `dest()` are no longer used.
  absl::Status CloseLengthDelimited(int field_number);

  // Like `CloseLengthDelimited()`, but does not write the field tag and length
  // if its contents turn out to be empty.
  absl::Status CloseOptionalLengthDelimited(int field_number);

  // Writes the field tag and the length of a length-delimited field.
  //
  // The value must have been written beforehand to `writer()`, with exactly
  // `length` bytes, unless the `SerializedMessageBackwardWriter` and its
  // `dest()` are no longer used.
  //
  // Fails if `length` exceeds 2GiB.
  //
  // `WriteLengthUnchecked()` is slightly more efficient than
  // `OpenLengthDelimited()` or `NewLengthDelimited()` with
  // `CloseLengthDelimited()`, but harder to use: the length must be specified
  // explicitly, and its correctness is not checked.
  //
  // Writing the contents and calling `WriteLengthUnchecked()` is done in the
  // opposite order than in `SerializedMessageWriter`.
  absl::Status WriteLengthUnchecked(int field_number, Position length);

  // Writes a group delimiter.
  //
  // Each `OpenGroup()` must be matched with a `CloseGroup()` call, unless the
  // `SerializedMessageBackwardWriter` and its `dest()` are no longer used.
  absl::Status OpenGroup(int field_number);
  absl::Status CloseGroup(int field_number);

 private:
  ABSL_ATTRIBUTE_COLD static absl::Status LengthOverflowError(Position length);
  ABSL_ATTRIBUTE_COLD static absl::Status WriteStringFailed(
      Reader& src, BackwardWriter& dest);

  BackwardWriter* absl_nullable dest_ = nullptr;
  std::vector<Position> submessages_;

  // Invariant: if `!submessages_.empty()` then `dest_ != nullptr`
};

// Implementation details follow.

inline SerializedMessageBackwardWriter::SerializedMessageBackwardWriter(
    SerializedMessageBackwardWriter&& that) noexcept
    : dest_(that.dest_), submessages_(std::exchange(that.submessages_, {})) {}

inline SerializedMessageBackwardWriter&
SerializedMessageBackwardWriter::operator=(
    SerializedMessageBackwardWriter&& that) noexcept {
  dest_ = that.dest_;
  submessages_ = std::exchange(that.submessages_, {});
  return *this;
}

inline void SerializedMessageBackwardWriter::set_dest(
    BackwardWriter* absl_nullable dest) {
  if (!submessages_.empty()) {
    RIEGELI_ASSERT(dest != nullptr)
        << "Failed precondition of "
           "SerializedMessageBackwardWriter::set_dest(): "
           "null BackwardWriter pointer while writing a submessage";
    RIEGELI_ASSERT(dest_ != nullptr)
        << "Failed invariant of SerializedMessageBackwardWriter: "
           "dest_ is null while writing a submessage";
    RIEGELI_ASSERT_EQ(dest->pos(), dest_->pos())
        << "Failed precondition of "
           "SerializedMessageBackwardWriter::set_dest(): "
           "pos() changes while writing a submessage";
  }
  dest_ = dest;
}

inline absl::Status SerializedMessageBackwardWriter::WriteInt32(
    int field_number, int32_t value) {
  return WriteUInt64(field_number, static_cast<uint64_t>(value));
}

inline absl::Status SerializedMessageBackwardWriter::WriteInt64(
    int field_number, int64_t value) {
  return WriteUInt64(field_number, static_cast<uint64_t>(value));
}

inline absl::Status SerializedMessageBackwardWriter::WriteUInt32(
    int field_number, uint32_t value) {
  const uint32_t tag = MakeTag(field_number, WireType::kVarint);
  const size_t length = LengthVarint32(tag) + LengthVarint32(value);
  if (ABSL_PREDICT_FALSE(!writer().Push(length))) return writer().status();
  writer().move_cursor(length);
  char* const ptr = WriteVarint32(tag, writer().cursor());
  WriteVarint32(value, ptr);
  return absl::OkStatus();
}

inline absl::Status SerializedMessageBackwardWriter::WriteUInt64(
    int field_number, uint64_t value) {
  const uint32_t tag = MakeTag(field_number, WireType::kVarint);
  const size_t length = LengthVarint32(tag) + LengthVarint64(value);
  if (ABSL_PREDICT_FALSE(!writer().Push(length))) return writer().status();
  writer().move_cursor(length);
  char* const ptr = WriteVarint32(tag, writer().cursor());
  WriteVarint64(value, ptr);
  return absl::OkStatus();
}

inline absl::Status SerializedMessageBackwardWriter::WriteSInt32(
    int field_number, int32_t value) {
  return WriteUInt32(field_number, EncodeVarintSigned32(value));
}

inline absl::Status SerializedMessageBackwardWriter::WriteSInt64(
    int field_number, int64_t value) {
  return WriteUInt64(field_number, EncodeVarintSigned64(value));
}

inline absl::Status SerializedMessageBackwardWriter::WriteBool(int field_number,
                                                               bool value) {
  return WriteUInt32(field_number, value ? 1 : 0);
}

inline absl::Status SerializedMessageBackwardWriter::WriteFixed32(
    int field_number, uint32_t value) {
  const uint32_t tag = MakeTag(field_number, WireType::kFixed32);
  const size_t length = LengthVarint32(tag) + sizeof(uint32_t);
  if (ABSL_PREDICT_FALSE(!writer().Push(length))) return writer().status();
  writer().move_cursor(length);
  char* const ptr = WriteVarint32(tag, writer().cursor());
  WriteLittleEndian<uint32_t>(value, ptr);
  return absl::OkStatus();
}

inline absl::Status SerializedMessageBackwardWriter::WriteFixed64(
    int field_number, uint64_t value) {
  const uint32_t tag = MakeTag(field_number, WireType::kFixed64);
  const size_t length = LengthVarint32(tag) + sizeof(uint64_t);
  if (ABSL_PREDICT_FALSE(!writer().Push(length))) return writer().status();
  writer().move_cursor(length);
  char* const ptr = WriteVarint32(tag, writer().cursor());
  WriteLittleEndian<uint64_t>(value, ptr);
  return absl::OkStatus();
}

inline absl::Status SerializedMessageBackwardWriter::WriteSFixed32(
    int field_number, int32_t value) {
  return WriteFixed32(field_number, static_cast<uint32_t>(value));
}

inline absl::Status SerializedMessageBackwardWriter::WriteSFixed64(
    int field_number, int64_t value) {
  return WriteFixed64(field_number, static_cast<uint64_t>(value));
}

inline absl::Status SerializedMessageBackwardWriter::WriteFloat(
    int field_number, float value) {
  return WriteFixed32(field_number, absl::bit_cast<uint32_t>(value));
}

inline absl::Status SerializedMessageBackwardWriter::WriteDouble(
    int field_number, double value) {
  return WriteFixed64(field_number, absl::bit_cast<uint64_t>(value));
}

template <typename EnumType,
          std::enable_if_t<std::disjunction_v<std::is_enum<EnumType>,
                                              std::is_integral<EnumType>>,
                           int>>
inline absl::Status SerializedMessageBackwardWriter::WriteEnum(int field_number,
                                                               EnumType value) {
  return WriteUInt64(field_number, static_cast<uint64_t>(value));
}

inline absl::Status SerializedMessageBackwardWriter::WritePackedInt32(
    int32_t value) {
  return WritePackedUInt64(static_cast<uint64_t>(value));
}

inline absl::Status SerializedMessageBackwardWriter::WritePackedInt64(
    int64_t value) {
  return WritePackedUInt64(static_cast<uint64_t>(value));
}

inline absl::Status SerializedMessageBackwardWriter::WritePackedUInt32(
    uint32_t value) {
  if (ABSL_PREDICT_FALSE(!WriteVarint32(value, writer()))) {
    return writer().status();
  }
  return absl::OkStatus();
}

inline absl::Status SerializedMessageBackwardWriter::WritePackedUInt64(
    uint64_t value) {
  if (ABSL_PREDICT_FALSE(!WriteVarint64(value, writer()))) {
    return writer().status();
  }
  return absl::OkStatus();
}

inline absl::Status SerializedMessageBackwardWriter::WritePackedSInt32(
    int32_t value) {
  return WritePackedUInt32(EncodeVarintSigned32(value));
}

inline absl::Status SerializedMessageBackwardWriter::WritePackedSInt64(
    int64_t value) {
  return WritePackedUInt64(EncodeVarintSigned64(value));
}

inline absl::Status SerializedMessageBackwardWriter::WritePackedBool(
    bool value) {
  return WritePackedUInt32(value ? 1 : 0);
}

inline absl::Status SerializedMessageBackwardWriter::WritePackedFixed32(
    uint32_t value) {
  if (ABSL_PREDICT_FALSE(!WriteLittleEndian<uint32_t>(value, writer()))) {
    return writer().status();
  }
  return absl::OkStatus();
}

inline absl::Status SerializedMessageBackwardWriter::WritePackedFixed64(
    uint64_t value) {
  if (ABSL_PREDICT_FALSE(!WriteLittleEndian<uint64_t>(value, writer()))) {
    return writer().status();
  }
  return absl::OkStatus();
}

inline absl::Status SerializedMessageBackwardWriter::WritePackedSFixed32(
    int32_t value) {
  return WritePackedFixed32(static_cast<uint32_t>(value));
}

inline absl::Status SerializedMessageBackwardWriter::WritePackedSFixed64(
    int64_t value) {
  return WritePackedFixed64(static_cast<uint64_t>(value));
}

inline absl::Status SerializedMessageBackwardWriter::WritePackedFloat(
    float value) {
  return WritePackedFixed32(absl::bit_cast<uint32_t>(value));
}

inline absl::Status SerializedMessageBackwardWriter::WritePackedDouble(
    double value) {
  return WritePackedFixed64(absl::bit_cast<uint64_t>(value));
}

template <typename EnumType,
          std::enable_if_t<std::disjunction_v<std::is_enum<EnumType>,
                                              std::is_integral<EnumType>>,
                           int>>
inline absl::Status SerializedMessageBackwardWriter::WritePackedEnum(
    EnumType value) {
  return WritePackedUInt64(static_cast<uint64_t>(value));
}

template <typename... Values
#if !__cpp_concepts
          ,
          std::enable_if_t<
              std::conjunction_v<
                  IsStringifiable<Values...>,
                  std::negation<std::is_convertible<
                      Values&&, const google::protobuf::MessageLite&>>...>,
              int>
#endif
          >
#if __cpp_concepts
  requires((IsStringifiable<Values>::value && ...) &&
           (!std::is_convertible_v<Values &&,
                                   const google::protobuf::MessageLite&> &&
            ...))
#endif
inline absl::Status SerializedMessageBackwardWriter::WriteString(
    int field_number, Values&&... values) {
  const Position pos_before = writer().pos();
  if (ABSL_PREDICT_FALSE(!writer().Write(std::forward<Values>(values)...))) {
    return writer().status();
  }
  RIEGELI_ASSERT_GE(writer().pos(), pos_before)
      << "BackwardWriter::Write() decreased pos()";
  return WriteLengthUnchecked(field_number, writer().pos() - pos_before);
}

inline absl::Status SerializedMessageBackwardWriter::WriteString(
    int field_number, const google::protobuf::MessageLite& message,
    SerializeMessageOptions options) {
  const size_t length = options.GetByteSize(message);
  if (absl::Status status =
          riegeli::SerializeMessage(message, writer(), options);
      ABSL_PREDICT_FALSE(!status.ok())) {
    return status;
  }
  return WriteLengthUnchecked(field_number, length);
}

template <typename ReaderType>
absl::Status SerializedMessageBackwardWriter::WriteString(
    int field_number, ReaderSpan<ReaderType> src) {
  if (ABSL_PREDICT_FALSE(src.length() >
                         uint32_t{std::numeric_limits<int32_t>::max()})) {
    return LengthOverflowError(src.length());
  }
  if (ABSL_PREDICT_FALSE(
          !src.reader().Copy(IntCast<size_t>(src.length()), writer()))) {
    return WriteStringFailed(src.reader(), writer());
  }
  return WriteLengthUnchecked(field_number, src.length());
}

inline absl::Status SerializedMessageBackwardWriter::WriteLengthUnchecked(
    int field_number, Position length) {
  if (ABSL_PREDICT_FALSE(length >
                         uint32_t{std::numeric_limits<int32_t>::max()})) {
    return LengthOverflowError(length);
  }
  const uint32_t tag = MakeTag(field_number, WireType::kLengthDelimited);
  const size_t header_length =
      LengthVarint32(tag) + LengthVarint32(IntCast<uint32_t>(length));
  if (ABSL_PREDICT_FALSE(!writer().Push(header_length))) {
    return writer().status();
  }
  writer().move_cursor(header_length);
  char* const ptr = WriteVarint32(tag, writer().cursor());
  WriteVarint32(IntCast<uint32_t>(length), ptr);
  return absl::OkStatus();
}

inline absl::Status SerializedMessageBackwardWriter::OpenGroup(
    int field_number) {
  if (ABSL_PREDICT_FALSE(!WriteVarint32(
          MakeTag(field_number, WireType::kEndGroup), writer()))) {
    return writer().status();
  }
  return absl::OkStatus();
}

inline absl::Status SerializedMessageBackwardWriter::CloseGroup(
    int field_number) {
  if (ABSL_PREDICT_FALSE(!WriteVarint32(
          MakeTag(field_number, WireType::kStartGroup), writer()))) {
    return writer().status();
  }
  return absl::OkStatus();
}

}  // namespace riegeli

#endif  // RIEGELI_MESSAGES_SERIALIZED_MESSAGE_BACKWARD_WRITER_H_


================================================
FILE: riegeli/messages/serialized_message_internal.h
================================================
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_MESSAGES_SERIALIZED_MESSAGE_INTERNAL_
#define RIEGELI_MESSAGES_SERIALIZED_MESSAGE_INTERNAL_

#include <stddef.h>
#include <stdint.h>

#include <functional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/status/status.h"
#include "riegeli/base/type_erased_ref.h"

namespace riegeli {

class LimitingReaderBase;
class SerializedMessageWriter;

namespace serialized_message_internal {

template <typename Enable, typename Context, typename Action, typename... Args>
struct IsInvocableWithContextImpl : std::false_type {};

template <typename Context, typename Action, typename... Args>
struct IsInvocableWithContextImpl<std::enable_if_t<!std::is_void_v<Context>>,
                                  Context, Action, Args...>
    : std::is_invocable_r<absl::Status, Action, Args..., Context&> {};

template <typename Context, typename Action, typename... Args>
struct IsInvocableWithContext
    : IsInvocableWithContextImpl<void, Context, Action, Args...> {};

template <typename Action, typename... Args>
struct IsInvocableWithoutContext
    : std::is_invocable_r<absl::Status, Action, Args...> {};

template <typename Context, typename Action, typename... Args>
struct IsAction
    : std::disjunction<IsInvocableWithContext<Context, Action, Args...>,
                       IsInvocableWithoutContext<Action, Args...>> {};

template <typename Context, typename Action, typename... Args,
          std::enable_if_t<
              IsInvocableWithContext<Context, Action, Args...>::value, int> = 0>
inline absl::Status InvokeAction(TypeErasedRef context, Action&& action,
                                 Args&&... args) {
  return std::invoke(std::forward<Action>(action), std::forward<Args>(args)...,
                     context.Cast<Context&>());
}

template <typename Context, typename Action, typename... Args,
          std::enable_if_t<IsInvocableWithoutContext<Action, Args...>::value,
                           int> = 0>
inline absl::Status InvokeAction(ABSL_ATTRIBUTE_UNUSED TypeErasedRef context,
                                 Action&& action, Args&&... args) {
  return std::invoke(std::forward<Action>(action), std::forward<Args>(args)...);
}

template <typename Context, typename Action, typename... Args>
struct IsActionWithSrc
    : IsAction<Context, Action, Args..., LimitingReaderBase&> {};

template <typename Context, typename Action, typename... Args>
struct IsActionWithoutSrc : IsAction<Context, Action, Args...> {};

template <typename Context, typename Action, typename... Args>
struct IsActionWithOptionalSrc
    : std::disjunction<IsActionWithSrc<Context, Action, Args...>,
                       IsActionWithoutSrc<Context, Action, Args...>> {};

template <
    typename Context, typename Action, typename... Args,
    std::enable_if_t<IsActionWithSrc<Context, Action, Args...>::value, int> = 0>
inline absl::Status InvokeActionWithSrc(LimitingReaderBase& src,
                                        TypeErasedRef context, Action&& action,
                                        Args&&... args) {
  return InvokeAction<Context>(context, std::forward<Action>(action),
                               std::forward<Args>(args)..., src);
}

template <typename Context, typename Action, typename... Args,
          std::enable_if_t<IsActionWithoutSrc<Context, Action, Args...>::value,
                           int> = 0>
inline absl::Status InvokeActionWithSrc(
    ABSL_ATTRIBUTE_UNUSED LimitingReaderBase& src, TypeErasedRef context,
    Action&& action, Args&&... args) {
  return InvokeAction<Context>(context, std::forward<Action>(action),
                               std::forward<Args>(args)...);
}

template <typename Context, typename Action, typename... Args>
struct IsActionWithDest
    : IsAction<Context, Action, Args..., SerializedMessageWriter&> {};

template <typename Context, typename Action, typename... Args>
struct IsActionWithoutDest : IsAction<Context, Action, Args...> {};

template <typename Context, typename Action, typename... Args>
struct IsActionWithOptionalDest
    : std::disjunction<IsActionWithDest<Context, Action, Args...>,
                       IsActionWithoutDest<Context, Action, Args...>> {};

template <typename Context, typename Action, typename... Args,
          std::enable_if_t<IsActionWithDest<Context, Action, Args...>::value,
                           int> = 0>
inline absl::Status InvokeActionWithDest(SerializedMessageWriter& dest,
                                         TypeErasedRef context, Action&& action,
                                         Args&&... args) {
  return InvokeAction<Context>(context, std::forward<Action>(action),
                               std::forward<Args>(args)..., dest);
}

template <typename Context, typename Action, typename... Args,
          std::enable_if_t<IsActionWithoutDest<Context, Action, Args...>::value,
                           int> = 0>
inline absl::Status InvokeActionWithDest(
    ABSL_ATTRIBUTE_UNUSED SerializedMessageWriter& dest, TypeErasedRef context,
    Action&& action, Args&&... args) {
  return InvokeAction<Context>(context, std::forward<Action>(action),
                               std::forward<Args>(args)...);
}

template <typename Context, typename Action, typename... Args>
struct IsActionWithRequiredSrcAndOptionalDest
    : IsActionWithOptionalDest<Context, Action, Args..., LimitingReaderBase&> {
};

template <typename Context, typename Action, typename... Args>
struct IsActionWithOptionalSrcAndDest
    : std::disjunction<
          IsActionWithRequiredSrcAndOptionalDest<Context, Action, Args...>,
          IsActionWithOptionalDest<Context, Action, Args...>> {};

template <typename Context, typename Action, typename... Args,
          std::enable_if_t<IsActionWithRequiredSrcAndOptionalDest<
                               Context, Action, Args...>::value,
                           int> = 0>
inline absl::Status InvokeActionWithSrcAndDest(LimitingReaderBase& src,
                                               SerializedMessageWriter& dest,
                                               TypeErasedRef context,
                                               Action&& action,
                                               Args&&... args) {
  return InvokeActionWithDest<Context>(dest, context,
                                       std::forward<Action>(action),
                                       std::forward<Args>(args)..., src);
}

template <
    typename Context, typename Action, typename... Args,
    std::enable_if_t<IsActionWithOptionalDest<Context, Action, Args...>::value,
                     int> = 0>
inline absl::Status InvokeActionWithSrcAndDest(
    ABSL_ATTRIBUTE_UNUSED LimitingReaderBase& src,
    SerializedMessageWriter& dest, TypeErasedRef context, Action&& action,
    Args&&... args) {
  return InvokeActionWithDest<Context>(
      dest, context, std::forward<Action>(action), std::forward<Args>(args)...);
}

}  // namespace serialized_message_internal

}  // namespace riegeli

#endif  // RIEGELI_MESSAGES_SERIALIZED_MESSAGE_INTERNAL_


================================================
FILE: riegeli/messages/serialized_message_reader.cc
================================================
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <stddef.h>
#include <stdint.h>

#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/limiting_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/messages/message_wire_format.h"
#include "riegeli/messages/serialized_message_reader_internal.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli::serialized_message_reader_internal {

namespace {

ABSL_ATTRIBUTE_COLD absl::Status ReadVarintError() {
  return absl::InvalidArgumentError("Could not read a varint field");
}

ABSL_ATTRIBUTE_COLD absl::Status ReadFixed32Error() {
  return absl::InvalidArgumentError("Could not read a fixed32 field");
}

ABSL_ATTRIBUTE_COLD absl::Status ReadFixed64Error() {
  return absl::InvalidArgumentError("Could not read a fixed64 field");
}

ABSL_ATTRIBUTE_COLD absl::Status NotEnoughError(uint32_t expected_length,
                                                Position available) {
  return absl::InvalidArgumentError(
      absl::StrCat("Not enough data: expected at least ", expected_length,
                   " more, will have at most ", available, " more"));
}

ABSL_ATTRIBUTE_COLD
absl::Status ReadLengthDelimitedLengthError() {
  return absl::InvalidArgumentError(
      "Could not read a length-delimited field length");
}

}  // namespace

absl::Status AnnotateWithFieldNumberSlow(absl::Status status,
                                         int field_number) {
  if (absl::IsCancelled(status)) return status;
  return riegeli::Annotate(status,
                           absl::StrCat("field number: ", field_number));
}

absl::Status AnnotateWithSourceAndFieldNumberSlow(absl::Status status,
                                                  Reader& src,
                                                  int field_number) {
  if (absl::IsCancelled(status)) return status;
  return AnnotateWithFieldNumberSlow(src.StatusOrAnnotate(std::move(status)),
                                     field_number);
}

absl::Status ReadTagError() {
  return absl::InvalidArgumentError("Could not read field tag");
}

absl::Status ReadTagError(Reader& src) {
  return src.StatusOrAnnotate(ReadTagError());
}

absl::Status ReadVarintError(int field_number) {
  return AnnotateWithFieldNumberSlow(ReadVarintError(), field_number);
}

absl::Status ReadVarintError(Reader& src, int field_number) {
  return AnnotateWithSourceAndFieldNumberSlow(ReadVarintError(), src,
                                              field_number);
}

absl::Status ReadFixed32Error(int field_number) {
  return AnnotateWithFieldNumberSlow(ReadFixed32Error(), field_number);
}

absl::Status ReadFixed32Error(Reader& src, int field_number) {
  return AnnotateWithSourceAndFieldNumberSlow(ReadFixed32Error(), src,
                                              field_number);
}

absl::Status ReadFixed64Error(int field_number) {
  return AnnotateWithFieldNumberSlow(ReadFixed64Error(), field_number);
}

absl::Status ReadFixed64Error(Reader& src, int field_number) {
  return AnnotateWithSourceAndFieldNumberSlow(ReadFixed64Error(), src,
                                              field_number);
}

absl::Status NotEnoughError(int field_number, uint32_t expected_length,
                            size_t available) {
  return AnnotateWithFieldNumberSlow(NotEnoughError(expected_length, available),
                                     field_number);
}

absl::Status NotEnoughError(LimitingReaderBase& src, int field_number,
                            uint32_t expected_length) {
  return AnnotateWithSourceAndFieldNumberSlow(
      NotEnoughError(expected_length, src.max_length()), src, field_number);
}

absl::Status ReadLengthDelimitedLengthError(int field_number) {
  return AnnotateWithFieldNumberSlow(ReadLengthDelimitedLengthError(),
                                     field_number);
}

absl::Status ReadLengthDelimitedLengthError(Reader& src, int field_number) {
  return AnnotateWithSourceAndFieldNumberSlow(ReadLengthDelimitedLengthError(),
                                              src, field_number);
}

absl::Status ReadLengthDelimitedValueError(Reader& src) {
  return src.StatusOrAnnotate(
      absl::InvalidArgumentError("Could not read a length-delimited field"));
}

absl::Status ReadLengthDelimitedValueError(Reader& src, int field_number) {
  return AnnotateWithFieldNumberSlow(ReadLengthDelimitedValueError(src),
                                     field_number);
}

absl::Status InvalidWireTypeError(uint32_t tag) {
  return absl::InvalidArgumentError(absl::StrCat(
      "Invalid wire type: ", static_cast<int>(GetTagWireType(tag))));
}

absl::Status InvalidWireTypeError(Reader& src, uint32_t tag) {
  return src.StatusOrAnnotate(InvalidWireTypeError(tag));
}

}  // namespace riegeli::serialized_message_reader_internal


================================================
FILE: riegeli/messages/serialized_message_reader.h
================================================
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_MESSAGES_SERIALIZED_MESSAGE_READER_H_
#define RIEGELI_MESSAGES_SERIALIZED_MESSAGE_READER_H_

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <optional>
#include <string>
#include <tuple>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/cord_iterator_span.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/type_traits.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/chain_reader.h"
#include "riegeli/bytes/cord_reader.h"
#include "riegeli/bytes/limiting_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/string_reader.h"
#include "riegeli/endian/endian_reading.h"
#include "riegeli/messages/message_wire_format.h"
#include "riegeli/messages/serialized_message_reader_internal.h"
#include "riegeli/varint/varint_reading.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// Overview of `SerializedMessageReader`
// --------------------------------------
//
// `SerializedMessageReader` reads a serialized proto message, performing
// specified actions on encountering particular fields, instead of filling an
// in-memory message object like in `ParseMessage()`.
//
// Use cases:
//
//  * Processing a subset of fields without the overhead of materializing the
//    message object, i.e. without processing the remaining fields, without
//    processing fields contained in submessages which can be processed as a
//    whole, and without keeping the whole parsed message in memory.
//
//  * Processing a message in a way known at runtime, possibly with the schema
//    known at runtime, possibly partially known.
//
//  * Processing messages with so many elements of toplevel repeated fields that
//    the total message size exceeds 2GiB. This is not a great idea in itself,
//    because such messages cannot be processed using native proto parsing and
//    serialization.
//
// Fields to be handled are specified by field handlers. For each field present
// in the serialized message, the first handler which accepts the field is
// invoked. If no handler accepts the field, the field is skipped.
//
// The serialized message is read from a `Reader`, `Cord`, or string. Other
// sources can be expressed as a `Reader` or string.

// Field handlers
// --------------
//
// Field handlers handle fields with particular field numbers and wire types
// (varint, fixed32, fixed64, length-delimited, start-group, or end-group)
// by invoking specific actions.
//
// There are three categories of field handlers:
//
//  * Static: the field number and wire type are statically known.
//
//  * Dynamic: they can handle a variety of field numbers and wire types,
//    determined at runtime.
//
//  * Unbound: the field number is unspecified, while the wire type is
//    statically known.
//
// Static and dynamic field handlers can be used directly with
// `SerializedMessageReader`. Unbound field handlers are meant to be registered
// with `DynamicFieldHandler` or `FieldHandlerMap`, with field numbers known
// at runtime.
//
// A family of functions returning static and unbound field handlers is defined
// in namespace `riegeli::field_handlers`.
//
// The primary dynamic field handlers are `DynamicFieldHandler`,
// `FieldHandlerMap`, `DynamicFieldCopier`, and `AnyFieldCopier`.
//
// All field handlers stored in a single `SerializedMessageReader` are usually
// conceptually associated with a single message type.
//
// If a field handler returns a failed `absl::Status`, `ReadMessage()` is
// cancelled and propagates the status, annotated by the `Reader` and/or with
// the field number. Annotations are skipped for `absl::CancelledError()` to
// make it more efficient to cancel a handler when cancellation is likely.
//
// A field handler can also be expressed as a reference to a const-qualified
// proper field handler, to avoid `SerializedMessageReader` taking the
// ownership. Use `std::cref()` in a `SerializedMessageReader()` call.

// Context types
// -------------
//
// It is recommended to make field handler actions stateless, i.e. independent
// from any state specific to the message object being read. This makes field
// handlers and `SerializedMessageReader` themselves stateless. Such a
// `SerializedMessageReader` can usually be stored in a `static constexpr`
// variable and reused for multiple messages.
//
// To facilitate this, the `SerializedMessageReader`, its field handlers, and
// their actions are parameterized by a sequence of `Context` types. Actions of
// field handlers receive additional `Context&...` parameters. Reading a
// serialized message provides `Context&...` arguments.

// Field handler protocol
// ----------------------
//
// Users of defined field handlers do not need to be concerned with this.
// This is relevant for writing custom field handlers.
//
// A field handler of a length-delimited field is applicable to a `Reader`
// source, and possibly also directly applicable to a `Cord` and/or string
// source. Applicability to a string source implies applicability to a `Reader`
// and `Cord` source, by reading a `string_view` from the `Reader` or `Cord`
// first.
//
// If `SerializedMessageReader::ReadMessage()` is called with a `Cord` or
// string, and some field handlers are not directly applicable to that source,
// then the source is wrapped in an appropriate `Reader`. This is done on the
// level of the whole `ReadMessage()`, instead of creating a `Reader` for each
// affected field, so that the cost of creating a `Reader` is paid once.
//
// Field handlers have a `static constexpr int kFieldNumber` member variable:
//  * For static field handlers: a positive field number.
//  * For dynamic field handlers: `kDynamicFieldNumber`.
//  * For unbound field handlers: `kUnboundFieldNumber`.
//
// Static and unbound field handlers provide at least one of the following
// member functions, with parameters followed by `Context&...`:
// ```
//   absl::Status HandleVarint(uint64_t repr) const;
//
//   absl::Status HandleFixed32(uint32_t repr) const;
//
//   absl::Status HandleFixed64(uint64_t repr) const;
//
//   // Directly applicable to a `Reader` source.
//   //
//   // `HandleLengthDelimitedFromReader()` must read to the end of the
//   // `ReaderSpan<>` or fail. `SkipLengthDelimitedFromReader()` can be used
//   // to ensure this property.
//   absl::Status HandleLengthDelimitedFromReader(ReaderSpan<> repr) const;
//
//   // Directly applicable to a `Cord` source.
//   //
//   // May use `scratch` for storage for temporary data.
//   //
//   // `HandleLengthDelimitedFromCord()` must read to the end of the
//   // `CordIteratorSpan` or fail. `SkipLengthDelimitedFromCord()` can be used
//   // to ensure this property.
//   absl::Status HandleLengthDelimitedFromCord(CordIteratorSpan repr,
//                                              std::string& scratch) const;
//
//   // Directly applicable to a string source.
//   absl::Status HandleLengthDelimitedFromString(absl::string_view repr)
//       const;
//
//   absl::Status HandleStartGroup() const;
//
//   absl::Status HandleEndGroup() const;
// ```
//
// For a `Reader` source, `HandleLengthDelimitedFromReader()` or
// `HandleLengthDelimitedFromString()` is used, depending on what is defined.
//
// For a `Cord` source, `HandleLengthDelimitedFromCord()` or
// `HandleLengthDelimitedFromString()` is used, depending on what is defined.
// If neither is defined but `HandleLengthDelimitedFromReader()` is defined,
// then the source is wrapped in a `CordReader`.
//
// For a string source, `HandleLengthDelimitedFromString()` is used. If it is
// not defined but `HandleLengthDelimitedFromReader()` is defined, then the
// source is wrapped in a `StringReader`.
//
// For a string source, if `HandleLengthDelimitedFromString()` is used, then the
// `absl::string_view` is guaranteed to be a substring of the original string.
// This guarantee is absent for a `Reader` or `Cord` source.
//
// Dynamic field handlers provide at least one of the following pairs of
// member functions corresponding to some wire type `X` as above, with `T...`
// as above, and with `DynamicHandleX()` parameters followed by `Context&...`
// ```
//   MaybeAccepted AcceptX(int field_number) const;`
//
//   absl::Status DynamicHandleX(Accepted accepted, T... repr) const;
// ```
//
// For length-delimited fields, a single `AcceptLengthDelimited()` function
// corresponds to `DynamicHandleLengthDelimitedFromReader()`,
// `DynamicHandleLengthDelimitedFromCord()`, and/or
// `DynamicHandleLengthDelimitedFromString()`.
//
// `MaybeAccepted` is some type explicitly convertible to `bool`, with
// `operator*` returning some `Accepted` type. `MaybeAccepted` can be e.g.
// `std::optional<Accepted>` or `Accepted*`. If `AcceptX()` returns a value
// explicitly convertible to `true`, then the field is accepted, and the
// corresponding `HandleX()` function is called with the result of `operator*`
// as the first argument.

// In `FieldHandler::kFieldNumber`, marks a dynamic field handler.
inline constexpr int kDynamicFieldNumber =
    serialized_message_reader_internal::kDynamicFieldNumber;

// In `FieldHandler::kFieldNumber`, marks an unbound field handler.
inline constexpr int kUnboundFieldNumber =
    serialized_message_reader_internal::kUnboundFieldNumber;

// `IsFieldHandler<T, Context...>::value` is `true` if `T` is a valid argument
// type for `SerializedMessageReader<Context...>`.
template <typename T, typename... Context>
struct IsFieldHandler;

// `IsUnboundFieldHandlerFromString<T, Context...>::value` is `true` if `T`
// is a valid argument type for `DynamicFieldHandler()` or
// `FieldHandlerMap::Builder::RegisterField()`.
//
// A length-delimited field handler must be directly applicable to a string
// source.
template <typename T, typename... Context>
struct IsUnboundFieldHandlerFromString;

// Like `IsUnboundFieldHandlerFromString`, but does not require the field
// handler to be directly applicable to a string source.
template <typename T, typename... Context>
struct IsUnboundFieldHandler;

// `IsUnboundFieldHandler<T, Context...>::value` is `true` if `T` is a valid
// argument type for `FieldHandlerMap::Builder::RegisterParent()`, assuming that
// `Context...` begins with `const FieldHandlerMap&`.
//
// The field handler must be directly applicable to a string source.
template <typename T, typename... Context>
struct IsUnboundFieldHandlerForLengthDelimitedFromString;

// Like `IsUnboundFieldHandlerForLengthDelimitedFromString`, but does not
// require the field handler to be directly applicable to a string source.
template <typename T, typename... Context>
struct IsUnboundFieldHandlerForLengthDelimited;

// For technical reasons related to template argument deduction,
// `SerializedMessageReader` is not a class template but a function template.
// Its return type is called `SerializedMessageReaderType`.
//
// The type is usually spelled `const auto`, preferably `static constexpr auto`.
//
// `FieldHandlers` is a `std::tuple` of field handlers.
template <typename FieldHandlers, typename... Context>
class SerializedMessageReaderType;

template <typename... FieldHandlers, typename... Context>
class SerializedMessageReaderType<std::tuple<FieldHandlers...>, Context...> {
 public:
  // Creates a `SerializedMessageReader` with default-initialized field
  // handlers. Designed for `Reset()`.
  SerializedMessageReaderType() = default;

  // Constructs a `SerializedMessageReader` from field handlers.
  template <
      typename... FieldHandlerInitializers,
      std::enable_if_t<
          std::conjunction_v<
              std::bool_constant<(sizeof...(FieldHandlerInitializers) > 0)>,
              NotSameRef<SerializedMessageReaderType,
                         FieldHandlerInitializers&&...>,
              std::is_convertible<FieldHandlerInitializers&&,
                                  FieldHandlers>...>,
          int> = 0>
  explicit constexpr SerializedMessageReaderType(
      FieldHandlerInitializers&&... field_handlers)
      : field_handlers_(
            std::forward<FieldHandlerInitializers>(field_handlers)...) {}

  SerializedMessageReaderType(const SerializedMessageReaderType&) = default;
  SerializedMessageReaderType& operator=(const SerializedMessageReaderType&) =
      default;

  SerializedMessageReaderType(SerializedMessageReaderType&& that) = default;
  SerializedMessageReaderType& operator=(SerializedMessageReaderType&& that) =
      default;

  // Makes `*this` equivalent to a newly constructed `SerializedMessageReader`.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset() {
    ResetImpl(std::index_sequence_for<FieldHandlers...>());
  }
  template <
      typename... FieldHandlerInitializers,
      std::enable_if_t<
          std::conjunction_v<
              std::bool_constant<(sizeof...(FieldHandlerInitializers) > 0)>,
              SupportsReset<FieldHandlers, FieldHandlerInitializers&&>...>,
          int> = 0>
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(
      FieldHandlerInitializers&&... field_handlers) {
    ResetImpl(std::index_sequence_for<FieldHandlers...>(),
              std::forward<FieldHandlerInitializers>(field_handlers)...);
  }

  // Reads a serialized message, letting field handlers process the fields.
  //
  // Reading from a string is more efficient than from other sources if all
  // field handlers are applicable to a string source. Otherwise, the source is
  // wrapped in an appropriate `Reader` if needed.
  //
  // For a `Reader` source, if any field handler handles length-delimited
  // fields, then the root `Reader` will be wrapped in a `LimitingReader`,
  // unless it is already statically known to be a `LimitingReaderBase`.
  // This allows field handlers to use `ScopedLimiter` for reading the value
  // of a length-delimited field.
  //
  // If `Reader::SupportsSize()` and it was wrapped in a `LimitingReader`,
  // then the `LimitingReader` is initially limited to the whole message.
  // This helps parsing untrusted data: if the size of the message is bounded,
  // then claimed lengths of length-delimited fields are bounded as well,
  // and thus it is safe to e.g. pass such a length to `Reader::Read()`.
  template <typename Src,
            std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value,
                             int> = 0>
  absl::Status ReadMessage(Src&& src, Context&... context) const;
  absl::Status ReadMessage(BytesRef src, Context&... context) const;
  absl::Status ReadMessage(const Chain& src, Context&... context) const;
  absl::Status ReadMessage(const absl::Cord& src, Context&... context) const;
  absl::Status ReadMessage(CordIteratorSpan src, Context&... context) const;

 private:
  template <size_t... indices>
  void ResetImpl(std::index_sequence<indices...>) {
    (riegeli::Reset(std::get<indices>(field_handlers_)), ...);
  }
  template <
      size_t... indices, typename... FieldHandlerInitializers,
      std::enable_if_t<(sizeof...(FieldHandlerInitializers) > 0), int> = 0>
  void ResetImpl(std::index_sequence<indices...>,
                 FieldHandlerInitializers&&... field_handlers) {
    (riegeli::Reset(std::get<indices>(field_handlers_),
                    std::forward<FieldHandlerInitializers>(field_handlers)...),
     ...);
  }

  template <typename ReaderType>
  absl::Status ReadMessageFromReader(ReaderType& src,
                                     Context&... context) const;
  absl::Status ReadMessageFromCord(absl::Cord::CharIterator& src,
                                   size_t available, Context&... context) const;
  absl::Status ReadMessageFromString(absl::string_view src,
                                     Context&... context) const;

  template <size_t index = 0>
  ABSL_ATTRIBUTE_ALWAYS_INLINE bool HandleVarintField(
      int field_number, uint64_t value, absl::Status& status,
      Context&... context) const {
    if constexpr (index < sizeof...(FieldHandlers)) {
      return (serialized_message_reader_internal::ReadVarintField(
                  field_number, value, status, std::get<index>(field_handlers_),
                  context...) ||
              HandleVarintField<index + 1>(field_number, value, status,
                                           context...));
    } else {
      return false;
    }
  }

  template <size_t index = 0>
  ABSL_ATTRIBUTE_ALWAYS_INLINE bool HandleFixed32Field(
      int field_number, uint32_t value, absl::Status& status,
      Context&... context) const {
    if constexpr (index < sizeof...(FieldHandlers)) {
      return (serialized_message_reader_internal::ReadFixed32Field(
                  field_number, value, status, std::get<index>(field_handlers_),
                  context...) ||
              HandleFixed32Field<index + 1>(field_number, value, status,
                                            context...));
    } else {
      return false;
    }
  }

  template <size_t index = 0>
  ABSL_ATTRIBUTE_ALWAYS_INLINE bool HandleFixed64Field(
      int field_number, uint64_t value, absl::Status& status,
      Context&... context) const {
    if constexpr (index < sizeof...(FieldHandlers)) {
      return (serialized_message_reader_internal::ReadFixed64Field(
                  field_number, value, status, std::get<index>(field_handlers_),
                  context...) ||
              HandleFixed64Field<index + 1>(field_number, value, status,
                                            context...));
    } else {
      return false;
    }
  }

  template <size_t index = 0>
  ABSL_ATTRIBUTE_ALWAYS_INLINE bool HandleLengthDelimitedFieldFromReader(
      int field_number, LimitingReaderBase& src, size_t length,
      absl::Status& status, Context&... context) const {
    if constexpr (index < sizeof...(FieldHandlers)) {
      return (serialized_message_reader_internal::
                  ReadLengthDelimitedFieldFromReader(
                      field_number, src, length, status,
                      std::get<index>(field_handlers_), context...) ||
              HandleLengthDelimitedFieldFromReader<index + 1>(
                  field_number, src, length, status, context...));
    } else {
      return false;
    }
  }

  template <size_t index = 0>
  ABSL_ATTRIBUTE_ALWAYS_INLINE bool HandleLengthDelimitedFieldFromCord(
      int field_number, absl::Cord::CharIterator& src, size_t length,
      std::string& scratch, absl::Status& status, Context&... context) const {
    if constexpr (index < sizeof...(FieldHandlers)) {
      return (
          serialized_message_reader_internal::ReadLengthDelimitedFieldFromCord(
              field_number, src, length, scratch, status,
              std::get<index>(field_handlers_), context...) ||
          HandleLengthDelimitedFieldFromCord<index + 1>(
              field_number, src, length, scratch, status, context...));
    } else {
      return false;
    }
  }

  template <size_t index = 0>
  ABSL_ATTRIBUTE_ALWAYS_INLINE bool HandleLengthDelimitedFieldFromString(
      int field_number, const char* cursor, size_t length, absl::Status& status,
      Context&... context) const {
    if constexpr (index < sizeof...(FieldHandlers)) {
      return (serialized_message_reader_internal::
                  ReadLengthDelimitedFieldFromString(
                      field_number, cursor, length, status,
                      std::get<index>(field_handlers_), context...) ||
              HandleLengthDelimitedFieldFromString<index + 1>(
                  field_number, cursor, length, status, context...));
    } else {
      return false;
    }
  }

  template <size_t index = 0>
  ABSL_ATTRIBUTE_ALWAYS_INLINE bool HandleStartGroupField(
      int field_number, absl::Status& status, Context&... context) const {
    if constexpr (index < sizeof...(FieldHandlers)) {
      return (
          serialized_message_reader_internal::ReadStartGroupField(
              field_number, status, std::get<index>(field_handlers_),
              context...) ||
          HandleStartGroupField<index + 1>(field_number, status, context...));
    } else {
      return false;
    }
  }

  template <size_t index = 0>
  ABSL_ATTRIBUTE_ALWAYS_INLINE bool HandleEndGroupField(
      int field_number, absl::Status& status, Context&... context) const {
    if constexpr (index < sizeof...(FieldHandlers)) {
      return (serialized_message_reader_internal::ReadEndGroupField(
                  field_number, status, std::get<index>(field_handlers_),
                  context...) ||
              HandleEndGroupField<index + 1>(field_number, status, context...));
    } else {
      return false;
    }
  }

  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS std::tuple<FieldHandlers...> field_handlers_;
};

// Returns a `SerializedMessageReaderType` which reads serialized messages using
// the given field handlers.
//
// Typical usage:
// ```
//   static constexpr auto message_reader =
//       riegeli::SerializedMessageReader<Context...>(
//           field_handlers...);
//   absl::Status status = message_reader.ReadMessage(src, context...);
// ```
//
// In the `message_reader` name, it can be helpful to replace `message` with the
// actual message type being read.
//
// `Context` types must be specified explicitly for `SerializedMessageReader`.
// Field handlers and their actions must accept compatible `Context&...`
// parameters.
template <
    typename... Context, typename... FieldHandlerInitializers
#if !__cpp_concepts
    ,
    std::enable_if_t<std::conjunction_v<IsFieldHandler<
                         TargetT<FieldHandlerInitializers>, Context...>...>,
                     int> = 0
#endif
    >
#if __cpp_concepts
// For conjunctions, `requires` gives better error messages than
// `std::enable_if_t`, indicating the relevant argument.
  requires(
      IsFieldHandler<TargetT<FieldHandlerInitializers>, Context...>::value &&
      ...)
#endif
constexpr SerializedMessageReaderType<
    std::tuple<TargetT<FieldHandlerInitializers>...>, Context...>
SerializedMessageReader(FieldHandlerInitializers&&... field_handlers) {
  return SerializedMessageReaderType<
      std::tuple<TargetT<FieldHandlerInitializers>...>, Context...>(
      std::forward<FieldHandlerInitializers>(field_handlers)...);
}

// In the field handler protocol, `HandleLengthDelimitedFromReader()` must read
// to the end of the `ReaderSpan<>` or fail.
//
// `SkipLengthDelimitedFromReader()` can be used to ensure this property.
// With an action, the part of the field not read by the action is skipped.
// Without an action, the whole field is skipped.

template <typename Action,
          std::enable_if_t<std::is_invocable_v<Action>, int> = 0>
absl::Status SkipLengthDelimitedFromReader(const ReaderSpan<>& value,
                                           Action&& action);

absl::Status SkipLengthDelimitedFromReader(ReaderSpan<> value);

// In the field handler protocol, `HandleLengthDelimitedFromCord()` must read
// to the end of the `CordIteratorSpan` or fail.
//
// `SkipLengthDelimitedFromCord()` can be used to ensure this property.
// With an action, the part of the field not read by the action is skipped.
// Without an action, the whole field is skipped.

template <typename Action,
          std::enable_if_t<std::is_invocable_v<Action>, int> = 0>
absl::Status SkipLengthDelimitedFromCord(const CordIteratorSpan& value,
                                         Action&& action);

absl::Status SkipLengthDelimitedFromCord(CordIteratorSpan value);

// Implementation details follow.

template <typename T, typename... Context>
struct IsFieldHandler
    : std::disjunction<serialized_message_reader_internal::IsStaticFieldHandler<
                           T, Context...>,
                       serialized_message_reader_internal::
                           IsDynamicFieldHandler<T, Context...>> {};

template <typename T, typename... Context>
struct IsUnboundFieldHandlerFromString
    : serialized_message_reader_internal::IsUnboundFieldHandlerFromString<
          T, Context...> {};

template <typename T, typename... Context>
struct IsUnboundFieldHandler
    : serialized_message_reader_internal::IsUnboundFieldHandler<T, Context...> {
};

template <typename T, typename... Context>
struct IsUnboundFieldHandlerForLengthDelimitedFromString
    : std::conjunction<
          serialized_message_reader_internal::
              IsFieldHandlerWithUnboundFieldNumber<T>,
          serialized_message_reader_internal::
              IsStaticFieldHandlerForLengthDelimitedFromString<T, Context...>> {
};

template <typename T, typename... Context>
struct IsUnboundFieldHandlerForLengthDelimited
    : std::conjunction<
          serialized_message_reader_internal::
              IsFieldHandlerWithUnboundFieldNumber<T>,
          serialized_message_reader_internal::
              IsStaticFieldHandlerForLengthDelimited<T, Context...>> {};

template <typename... FieldHandlers, typename... Context>
template <typename ReaderType>
absl::Status SerializedMessageReaderType<
    std::tuple<FieldHandlers...>,
    Context...>::ReadMessageFromReader(ReaderType& src,
                                       Context&... context) const {
  uint32_t tag;
  while (ReadVarint32(src, tag)) {
    const int field_number = GetTagFieldNumber(tag);
    switch (GetTagWireType(tag)) {
      case WireType::kVarint: {
        if constexpr (std::disjunction_v<
                          serialized_message_reader_internal::
                              IsFieldHandlerForVarint<FieldHandlers,
                                                      Context...>...>) {
          uint64_t value;
          if (ABSL_PREDICT_FALSE(!ReadVarint64(src, value))) {
            return serialized_message_reader_internal::ReadVarintError(
                src, field_number);
          }
          absl::Status status;
          if (HandleVarintField(field_number, value, status, context...)) {
            if (ABSL_PREDICT_FALSE(!status.ok())) {
              return serialized_message_reader_internal::
                  AnnotateWithSourceAndFieldNumber(std::move(status), src,
                                                   field_number);
            }
          }
        } else {
          // The value is not needed. Use more efficient `SkipVarint64()`
          // instead of `ReadVarint64()`.
          if (ABSL_PREDICT_FALSE(!SkipVarint64(src))) {
            return serialized_message_reader_internal::ReadVarintError(
                src, field_number);
          }
        }
        continue;
      }
      case WireType::kFixed32: {
        uint32_t value;
        if (ABSL_PREDICT_FALSE(!ReadLittleEndian<uint32_t>(src, value))) {
          return serialized_message_reader_internal::ReadFixed32Error(
              src, field_number);
        }
        absl::Status status;
        if (HandleFixed32Field(field_number, value, status, context...)) {
          if (ABSL_PREDICT_FALSE(!status.ok())) {
            return serialized_message_reader_internal::
                AnnotateWithSourceAndFieldNumber(std::move(status), src,
                                                 field_number);
          }
        }
        continue;
      }
      case WireType::kFixed64: {
        uint64_t value;
        if (ABSL_PREDICT_FALSE(!ReadLittleEndian<uint64_t>(src, value))) {
          return serialized_message_reader_internal::ReadFixed64Error(
              src, field_number);
        }
        absl::Status status;
        if (HandleFixed64Field(field_number, value, status, context...)) {
          if (ABSL_PREDICT_FALSE(!status.ok())) {
            return serialized_message_reader_internal::
                AnnotateWithSourceAndFieldNumber(std::move(status), src,
                                                 field_number);
          }
        }
        continue;
      }
      case WireType::kLengthDelimited: {
        uint32_t length;
        if (ABSL_PREDICT_FALSE(
                !ReadVarint32(src, length) ||
                length > uint32_t{std::numeric_limits<int32_t>::max()})) {
          return serialized_message_reader_internal::
              ReadLengthDelimitedLengthError(src, field_number);
        }
        if constexpr (std::disjunction_v<
                          serialized_message_reader_internal::
                              IsFieldHandlerForLengthDelimited<
                                  FieldHandlers, Context...>...>) {
          static_assert(
              std::is_same_v<ReaderType, LimitingReaderBase>,
              "If there are any field handlers for length-delimited fields, "
              "ReadInternal() must be called with LimitingReaderBase");
          if (ABSL_PREDICT_FALSE(length > src.max_length())) {
            return serialized_message_reader_internal::NotEnoughError(
                src, field_number, length);
          }
          const Position end_pos = src.pos() + size_t{length};
          absl::Status status;
          if (HandleLengthDelimitedFieldFromReader(
                  field_number, src, size_t{length}, status, context...)) {
            if (ABSL_PREDICT_FALSE(!status.ok())) {
              return serialized_message_reader_internal::
                  AnnotateWithFieldNumber(std::move(status), field_number);
            }
            RIEGELI_ASSERT_EQ(src.pos(), end_pos)
                << "A field handler of a length-delimited field "
                   "must read to the end of the ReaderSpan<> or fail; "
                   "the value of field "
                << field_number << " at position " << (end_pos - size_t{length})
                << " has length " << size_t{length} << " but length "
                << static_cast<int64_t>(src.pos() - (end_pos - size_t{length}))
                << " has been read; "
                   "consider using SkipLengthDelimitedFromReader()";
            continue;
          }
        }
        if (ABSL_PREDICT_FALSE(!src.Skip(size_t{length}))) {
          return serialized_message_reader_internal::
              ReadLengthDelimitedValueError(src, field_number);
        }
        continue;
      }
      case WireType::kStartGroup: {
        absl::Status status;
        if (HandleStartGroupField(field_number, status, context...)) {
          if (ABSL_PREDICT_FALSE(!status.ok())) {
            return serialized_message_reader_internal::
                AnnotateWithSourceAndFieldNumber(std::move(status), src,
                                                 field_number);
          }
        }
        continue;
      }
      case WireType::kEndGroup: {
        absl::Status status;
        if (HandleEndGroupField(field_number, status, context...)) {
          if (ABSL_PREDICT_FALSE(!status.ok())) {
            return serialized_message_reader_internal::
                AnnotateWithSourceAndFieldNumber(std::move(status), src,
                                                 field_number);
          }
        }
        continue;
      }
      case WireType::kInvalid6:
      case WireType::kInvalid7:
        return serialized_message_reader_internal::InvalidWireTypeError(tag);
    }
    RIEGELI_ASSUME_UNREACHABLE()
        << "Impossible wire type: " << static_cast<int>(GetTagWireType(tag));
  }
  if (ABSL_PREDICT_FALSE(src.available() > 0)) {
    return serialized_message_reader_internal::ReadTagError(src);
  }
  return absl::OkStatus();
}

template <typename... FieldHandlers, typename... Context>
inline absl::Status SerializedMessageReaderType<
    std::tuple<FieldHandlers...>,
    Context...>::ReadMessageFromCord(absl::Cord::CharIterator& src,
                                     size_t available,
                                     Context&... context) const {
  const size_t limit = CordIteratorSpan::Remaining(src) - available;
  std::string scratch;
  uint32_t tag;
  while (ReadVarint32(src, CordIteratorSpan::Remaining(src) - limit, tag)) {
    const int field_number = GetTagFieldNumber(tag);
    switch (GetTagWireType(tag)) {
      case WireType::kVarint: {
        if constexpr (std::disjunction_v<
                          serialized_message_reader_internal::
                              IsFieldHandlerForVarint<FieldHandlers,
                                                      Context...>...>) {
          uint64_t value;
          if (ABSL_PREDICT_FALSE(!ReadVarint64(
                  src, CordIteratorSpan::Remaining(src) - limit, value))) {
            return serialized_message_reader_internal::ReadVarintError(
                field_number);
          }
          absl::Status status;
          if (HandleVarintField(field_number, value, status, context...)) {
            if (ABSL_PREDICT_FALSE(!status.ok())) {
              return serialized_message_reader_internal::
                  AnnotateWithFieldNumber(std::move(status), field_number);
            }
          }
        } else {
          // The value is not needed. Use more efficient `SkipVarint64()`
          // instead of `ReadVarint64()`.
          if (ABSL_PREDICT_FALSE(!SkipVarint64(
                  src, CordIteratorSpan::Remaining(src) - limit))) {
            return serialized_message_reader_internal::ReadVarintError(
                field_number);
          }
        }
        continue;
      }
      case WireType::kFixed32: {
        if (ABSL_PREDICT_FALSE(CordIteratorSpan::Remaining(src) - limit <
                               sizeof(uint32_t))) {
          return serialized_message_reader_internal::ReadFixed32Error(
              field_number);
        }
        if constexpr (std::disjunction_v<
                          serialized_message_reader_internal::
                              IsFieldHandlerForFixed32<FieldHandlers,
                                                       Context...>...>) {
          char buffer[sizeof(uint32_t)];
          CordIteratorSpan::Read(src, sizeof(uint32_t), buffer);
          const uint32_t value = ReadLittleEndian<uint32_t>(buffer);
          absl::Status status;
          if (HandleFixed32Field(field_number, value, status, context...)) {
            if (ABSL_PREDICT_FALSE(!status.ok())) {
              return serialized_message_reader_internal::
                  AnnotateWithFieldNumber(std::move(status), field_number);
            }
          }
        } else {
          // The value is not needed. Use more efficient `Cord::Advance()`
          // instead of `CordIteratorSpan::Read()`.
          absl::Cord::Advance(&src, sizeof(uint32_t));
        }
        continue;
      }
      case WireType::kFixed64: {
        if (ABSL_PREDICT_FALSE(CordIteratorSpan::Remaining(src) - limit <
                               sizeof(uint64_t))) {
          return serialized_message_reader_internal::ReadFixed64Error(
              field_number);
        }
        if constexpr (std::disjunction_v<
                          serialized_message_reader_internal::
                              IsFieldHandlerForFixed64<FieldHandlers,
                                                       Context...>...>) {
          char buffer[sizeof(uint64_t)];
          CordIteratorSpan::Read(src, sizeof(uint64_t), buffer);
          const uint64_t value = ReadLittleEndian<uint64_t>(buffer);
          absl::Status status;
          if (HandleFixed64Field(field_number, value, status, context...)) {
            if (ABSL_PREDICT_FALSE(!status.ok())) {
              return serialized_message_reader_internal::
                  AnnotateWithFieldNumber(std::move(status), field_number);
            }
          }
        } else {
          // The value is not needed. Use more efficient `Cord::Advance()`
          // instead of `CordIteratorSpan::Read()`.
          absl::Cord::Advance(&src, sizeof(uint64_t));
        }
        continue;
      }
      case WireType::kLengthDelimited: {
        uint32_t length;
        if (ABSL_PREDICT_FALSE(
                !ReadVarint32(src, CordIteratorSpan::Remaining(src) - limit,
                              length) ||
                length > uint32_t{std::numeric_limits<int32_t>::max()})) {
          return serialized_message_reader_internal::
              ReadLengthDelimitedLengthError(field_number);
        }
        const size_t available_for_value =
            CordIteratorSpan::Remaining(src) - limit;
        if (ABSL_PREDICT_FALSE(length > available_for_value)) {
          return serialized_message_reader_internal::NotEnoughError(
              field_number, length, available_for_value);
        }
        if constexpr (std::disjunction_v<
                          serialized_message_reader_internal::
                              IsFieldHandlerForLengthDelimited<
                                  FieldHandlers, Context...>...>) {
          absl::Status status;
          if (HandleLengthDelimitedFieldFromCord(field_number, src,
                                                 size_t{length}, scratch,
                                                 status, context...)) {
            if (ABSL_PREDICT_FALSE(!status.ok())) {
              return serialized_message_reader_internal::
                  AnnotateWithFieldNumber(std::move(status), field_number);
            }
            RIEGELI_ASSERT_EQ(CordIteratorSpan::Remaining(src),
                              available_for_value + limit - size_t{length})
                << "A field handler of a length-delimited field "
                   "must read to the end of the CordIteratorSpan or fail; "
                   "the value of field "
                << field_number << " has length " << size_t{length}
                << " but length "
                << (available_for_value + limit -
                    CordIteratorSpan::Remaining(src))
                << " has been read";
            continue;
          }
        }
        absl::Cord::Advance(&src, size_t{length});
        continue;
      }
      case WireType::kStartGroup: {
        absl::Status status;
        if (HandleStartGroupField(field_number, status, context...)) {
          if (ABSL_PREDICT_FALSE(!status.ok())) {
            return serialized_message_reader_internal::AnnotateWithFieldNumber(
                std::move(status), field_number);
          }
        }
        continue;
      }
      case WireType::kEndGroup: {
        absl::Status status;
        if (HandleEndGroupField(field_number, status, context...)) {
          if (ABSL_PREDICT_FALSE(!status.ok())) {
            return serialized_message_reader_internal::AnnotateWithFieldNumber(
                std::move(status), field_number);
          }
        }
        continue;
      }
      case WireType::kInvalid6:
      case WireType::kInvalid7:
        return serialized_message_reader_internal::InvalidWireTypeError(tag);
    }
    RIEGELI_ASSUME_UNREACHABLE()
        << "Impossible wire type: " << static_cast<int>(GetTagWireType(tag));
  }
  if (ABSL_PREDICT_FALSE(CordIteratorSpan::Remaining(src) > limit)) {
    return serialized_message_reader_internal::ReadTagError();
  }
  return absl::OkStatus();
}

template <typename... FieldHandlers, typename... Context>
inline absl::Status SerializedMessageReaderType<
    std::tuple<FieldHandlers...>,
    Context...>::ReadMessageFromString(absl::string_view src,
                                       Context&... context) const {
  const char* absl_nullable cursor = src.data();
  const char* const absl_nullable limit = src.data() + src.size();
  uint32_t tag;
  while (const size_t tag_length =
             ReadVarint32(cursor, PtrDistance(cursor, limit), tag)) {
    RIEGELI_ASSERT(cursor != nullptr);
    RIEGELI_ASSERT(limit != nullptr);
    cursor += tag_length;
    const int field_number = GetTagFieldNumber(tag);
    switch (GetTagWireType(tag)) {
      case WireType::kVarint: {
        if constexpr (std::disjunction_v<
                          serialized_message_reader_internal::
                              IsFieldHandlerForVarint<FieldHandlers,
                                                      Context...>...>) {
          uint64_t value;
          const size_t length_of_value =
              ReadVarint64(cursor, PtrDistance(cursor, limit), value);
          if (ABSL_PREDICT_FALSE(length_of_value == 0)) {
            return serialized_message_reader_internal::ReadVarintError(
                field_number);
          }
          cursor += length_of_value;
          absl::Status status;
          if (HandleVarintField(field_number, value, status, context...)) {
            if (ABSL_PREDICT_FALSE(!status.ok())) {
              return serialized_message_reader_internal::
                  AnnotateWithFieldNumber(std::move(status), field_number);
            }
          }
        } else {
          // The value is not needed. Use more efficient `SkipVarint64()`
          // instead of `ReadVarint64()`.
          const size_t length_of_value =
              SkipVarint64(cursor, PtrDistance(cursor, limit));
          if (ABSL_PREDICT_FALSE(length_of_value == 0)) {
            return serialized_message_reader_internal::ReadVarintError(
                field_number);
          }
          cursor += length_of_value;
        }
        continue;
      }
      case WireType::kFixed32: {
        if (ABSL_PREDICT_FALSE(PtrDistance(cursor, limit) < sizeof(uint32_t))) {
          return serialized_message_reader_internal::ReadFixed32Error(
              field_number);
        }
        const uint32_t value = ReadLittleEndian<uint32_t>(cursor);
        cursor += sizeof(uint32_t);
        absl::Status status;
        if (HandleFixed32Field(field_number, value, status, context...)) {
          if (ABSL_PREDICT_FALSE(!status.ok())) {
            return serialized_message_reader_internal::AnnotateWithFieldNumber(
                std::move(status), field_number);
          }
        }
        continue;
      }
      case WireType::kFixed64: {
        if (ABSL_PREDICT_FALSE(PtrDistance(cursor, limit) < sizeof(uint64_t))) {
          return serialized_message_reader_internal::ReadFixed64Error(
              field_number);
        }
        const uint64_t value = ReadLittleEndian<uint64_t>(cursor);
        cursor += sizeof(uint64_t);
        absl::Status status;
        if (HandleFixed64Field(field_number, value, status, context...)) {
          if (ABSL_PREDICT_FALSE(!status.ok())) {
            return serialized_message_reader_internal::AnnotateWithFieldNumber(
                std::move(status), field_number);
          }
        }
        continue;
      }
      case WireType::kLengthDelimited: {
        uint32_t length;
        const size_t length_of_length =
            ReadVarint32(cursor, PtrDistance(cursor, limit), length);
        if (ABSL_PREDICT_FALSE(
                length_of_length == 0 ||
                length > uint32_t{std::numeric_limits<int32_t>::max()})) {
          return serialized_message_reader_internal::
              ReadLengthDelimitedLengthError(field_number);
        }
        cursor += length_of_length;
        const size_t available_for_value = PtrDistance(cursor, limit);
        if (ABSL_PREDICT_FALSE(length > available_for_value)) {
          return serialized_message_reader_internal::NotEnoughError(
              field_number, length, available_for_value);
        }
        absl::Status status;
        if (HandleLengthDelimitedFieldFromString(
                field_number, cursor, size_t{length}, status, context...)) {
          if (ABSL_PREDICT_FALSE(!status.ok())) {
            return serialized_message_reader_internal::AnnotateWithFieldNumber(
                std::move(status), field_number);
          }
        }
        cursor += size_t{length};
        continue;
      }
      case WireType::kStartGroup: {
        absl::Status status;
        if (HandleStartGroupField(field_number, status, context...)) {
          if (ABSL_PREDICT_FALSE(!status.ok())) {
            return serialized_message_reader_internal::AnnotateWithFieldNumber(
                std::move(status), field_number);
          }
        }
        continue;
      }
      case WireType::kEndGroup: {
        absl::Status status;
        if (HandleEndGroupField(field_number, status, context...)) {
          if (ABSL_PREDICT_FALSE(!status.ok())) {
            return serialized_message_reader_internal::AnnotateWithFieldNumber(
                std::move(status), field_number);
          }
        }
        continue;
      }
      case WireType::kInvalid6:
      case WireType::kInvalid7:
        return serialized_message_reader_internal::InvalidWireTypeError(tag);
    }
    RIEGELI_ASSUME_UNREACHABLE()
        << "Impossible wire type: " << static_cast<int>(GetTagWireType(tag));
  }
  if (ABSL_PREDICT_FALSE(cursor < limit)) {
    return serialized_message_reader_internal::ReadTagError();
  }
  return absl::OkStatus();
}

template <typename... FieldHandlers, typename... Context>
template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
absl::Status SerializedMessageReaderType<
    std::tuple<FieldHandlers...>, Context...>::ReadMessage(Src&& src,
                                                           Context&... context)
    const {
  DependencyRef<Reader*, Src> src_dep(std::forward<Src>(src));
  if (src_dep.IsOwning()) src_dep->SetReadAllHint(true);

  absl::Status status;
  if constexpr (std::disjunction_v<serialized_message_reader_internal::
                                       IsFieldHandlerForLengthDelimited<
                                           FieldHandlers, Context...>...>) {
    if constexpr (std::is_convertible_v<
                      typename DependencyRef<Reader*, Src>::Subhandle,
                      LimitingReaderBase*>) {
      status = ReadMessageFromReader<LimitingReaderBase>(*src_dep, context...);
    } else {
      LimitingReaderBase::Options options;
      if (src_dep->SupportsSize()) {
        const std::optional<Position> size = src_dep->Size();
        if (ABSL_PREDICT_TRUE(size != std::nullopt)) options.set_max_pos(*size);
      }
      LimitingReader<> limiting_reader(src_dep.get(), options);
      status = ReadMessageFromReader<LimitingReaderBase>(limiting_reader,
                                                         context...);
      if (ABSL_PREDICT_FALSE(!limiting_reader.Close())) {
        status.Update(limiting_reader.status());
      }
    }
  } else {
    status = ReadMessageFromReader<Reader>(*src_dep, context...);
  }

  if (src_dep.IsOwning()) {
    if (ABSL_PREDICT_TRUE(status.ok())) src_dep->VerifyEnd();
    if (ABSL_PREDICT_FALSE(!src_dep->Close())) status.Update(src_dep->status());
  }
  return status;
}

template <typename... FieldHandlers, typename... Context>
absl::Status SerializedMessageReaderType<
    std::tuple<FieldHandlers...>, Context...>::ReadMessage(BytesRef src,
                                                           Context&... context)
    const {
  if constexpr (std::conjunction_v<serialized_message_reader_internal::
                                       IsFieldHandlerFromString<
                                           FieldHandlers, Context...>...>) {
    return ReadMessageFromString(src, context...);
  } else {
    return ReadMessage(StringReader(src), context...);
  }
}

template <typename... FieldHandlers, typename... Context>
absl::Status SerializedMessageReaderType<
    std::tuple<FieldHandlers...>, Context...>::ReadMessage(const Chain& src,
                                                           Context&... context)
    const {
  return ReadMessage(ChainReader(&src), context...);
}

template <typename... FieldHandlers, typename... Context>
absl::Status SerializedMessageReaderType<
    std::tuple<FieldHandlers...>,
    Context...>::ReadMessage(const absl::Cord& src, Context&... context) const {
  if constexpr (std::conjunction_v<std::disjunction<
                    serialized_message_reader_internal::IsFieldHandlerFromCord<
                        FieldHandlers, Context...>,
                    serialized_message_reader_internal::
                        IsFieldHandlerFromString<FieldHandlers,
                                                 Context...>>...>) {
    absl::Cord::CharIterator iter = src.char_begin();
    return ReadMessageFromCord(iter, CordIteratorSpan::Remaining(iter),
                               context...);
  } else {
    return ReadMessage(CordReader(&src), context...);
  }
}

template <typename... FieldHandlers, typename... Context>
absl::Status SerializedMessageReaderType<
    std::tuple<FieldHandlers...>, Context...>::ReadMessage(CordIteratorSpan src,
                                                           Context&... context)
    const {
  if constexpr (std::conjunction_v<std::disjunction<
                    serialized_message_reader_internal::IsFieldHandlerFromCord<
                        FieldHandlers, Context...>,
                    serialized_message_reader_internal::
                        IsFieldHandlerFromString<FieldHandlers,
                                                 Context...>>...>) {
    return ReadMessageFromCord(src.iterator(), src.length(), context...);
  } else {
    return ReadMessage(CordReader(std::move(src)), context...);
  }
}

template <typename Action, std::enable_if_t<std::is_invocable_v<Action>, int>>
inline absl::Status SkipLengthDelimitedFromReader(const ReaderSpan<>& value,
                                                  Action&& action) {
  LimitingReaderBase& reader = value.reader();
  const Position end_pos = reader.pos() + value.length();
  if (absl::Status status = std::forward<Action>(action)();
      ABSL_PREDICT_FALSE(!status.ok())) {
    return status;
  }
  if (ABSL_PREDICT_FALSE(!reader.Seek(end_pos))) {
    return serialized_message_reader_internal::ReadLengthDelimitedValueError(
        reader);
  }
  return absl::OkStatus();
}

inline absl::Status SkipLengthDelimitedFromReader(ReaderSpan<> value) {
  if (ABSL_PREDICT_FALSE(!value.reader().Skip(value.length()))) {
    return serialized_message_reader_internal::ReadLengthDelimitedValueError(
        value.reader());
  }
  return absl::OkStatus();
}

template <typename Action, std::enable_if_t<std::is_invocable_v<Action>, int>>
inline absl::Status SkipLengthDelimitedFromCord(const CordIteratorSpan& value,
                                                Action&& action) {
  absl::Cord::CharIterator& iterator = value.iterator();
  const size_t limit = CordIteratorSpan::Remaining(iterator) - value.length();
  if (absl::Status status = std::forward<Action>(action)();
      ABSL_PREDICT_FALSE(!status.ok())) {
    return status;
  }
  RIEGELI_ASSERT_GE(CordIteratorSpan::Remaining(iterator), limit)
      << "Length-delimited field handler action read past field contents";
  if (ABSL_PREDICT_FALSE(CordIteratorSpan::Remaining(iterator) != limit)) {
    absl::Cord::Advance(&iterator,
                        CordIteratorSpan::Remaining(iterator) - limit);
  }
  return absl::OkStatus();
}

inline absl::Status SkipLengthDelimitedFromCord(CordIteratorSpan value) {
  absl::Cord::Advance(&value.iterator(), value.length());
  return absl::OkStatus();
}

}  // namespace riegeli

#endif  // RIEGELI_MESSAGES_SERIALIZED_MESSAGE_READER_H_


================================================
FILE: riegeli/messages/serialized_message_reader_internal.h
================================================
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_MESSAGES_SERIALIZED_MESSAGE_READER_INTERNAL_H_
#define RIEGELI_MESSAGES_SERIALIZED_MESSAGE_READER_INTERNAL_H_

#include <stddef.h>
#include <stdint.h>

#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/cord_iterator_span.h"
#include "riegeli/bytes/limiting_reader.h"
#include "riegeli/bytes/reader.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli::serialized_message_reader_internal {

inline constexpr int kDynamicFieldNumber = -1;

inline constexpr int kUnboundFieldNumber = -2;

template <typename T, typename Enable = void>
struct IsFieldHandlerWithStaticFieldNumber : std::false_type {};

template <typename T>
struct IsFieldHandlerWithStaticFieldNumber<
    T, std::enable_if_t<(std::remove_reference_t<T>::kFieldNumber > 0)>>
    : std::true_type {};

template <typename T, typename Enable = void>
struct IsFieldHandlerWithDynamicFieldNumber : std::false_type {};

template <typename T>
struct IsFieldHandlerWithDynamicFieldNumber<
    T, std::enable_if_t<std::remove_reference_t<T>::kFieldNumber ==
                        kDynamicFieldNumber>> : std::true_type {};

template <typename T, typename Enable = void>
struct IsFieldHandlerWithUnboundFieldNumber : std::false_type {};

template <typename T>
struct IsFieldHandlerWithUnboundFieldNumber<
    T, std::enable_if_t<(std::remove_reference_t<T>::kFieldNumber ==
                         kUnboundFieldNumber)>> : std::true_type {};

template <typename T, typename Enable, typename... Context>
struct IsStaticFieldHandlerForVarintImpl : std::false_type {};

template <typename T, typename... Context>
struct IsStaticFieldHandlerForVarintImpl<
    T,
    std::void_t<decltype(std::declval<const T&>().HandleVarint(
        std::declval<uint64_t>(), std::declval<Context&>()...))>,
    Context...> : std::true_type {};

template <typename T, typename Enable, typename... Context>
struct IsStaticFieldHandlerForFixed32Impl : std::false_type {};

template <typename T, typename... Context>
struct IsStaticFieldHandlerForFixed32Impl<
    T,
    std::void_t<decltype(std::declval<const T&>().HandleFixed32(
        std::declval<uint32_t>(), std::declval<Context&>()...))>,
    Context...> : std::true_type {};

template <typename T, typename Enable, typename... Context>
struct IsStaticFieldHandlerForFixed64Impl : std::false_type {};

template <typename T, typename... Context>
struct IsStaticFieldHandlerForFixed64Impl<
    T,
    std::void_t<decltype(std::declval<const T&>().HandleFixed64(
        std::declval<uint64_t>(), std::declval<Context&>()...))>,
    Context...> : std::true_type {};

template <typename T, typename Enable, typename... Context>
struct IsStaticFieldHandlerForLengthDelimitedFromReaderImpl : std::false_type {
};

template <typename T, typename... Context>
struct IsStaticFieldHandlerForLengthDelimitedFromReaderImpl<
    T,
    std::void_t<
        decltype(std::declval<const T&>().HandleLengthDelimitedFromReader(
            std::declval<ReaderSpan<>>(), std::declval<Context&>()...))>,
    Context...> : std::true_type {};

template <typename T, typename Enable, typename... Context>
struct IsStaticFieldHandlerForLengthDelimitedFromCordImpl : std::false_type {};

template <typename T, typename... Context>
struct IsStaticFieldHandlerForLengthDelimitedFromCordImpl<
    T,
    std::void_t<decltype(std::declval<const T&>().HandleLengthDelimitedFromCord(
        std::declval<CordIteratorSpan>(), std::declval<std::string&>(),
        std::declval<Context&>()...))>,
    Context...> : std::true_type {};

template <typename T, typename Enable, typename... Context>
struct IsStaticFieldHandlerForLengthDelimitedFromStringImpl : std::false_type {
};

template <typename T, typename... Context>
struct IsStaticFieldHandlerForLengthDelimitedFromStringImpl<
    T,
    std::void_t<
        decltype(std::declval<const T&>().HandleLengthDelimitedFromString(
            std::declval<absl::string_view>(), std::declval<Context&>()...))>,
    Context...> : std::true_type {};

template <typename T, typename Enable, typename... Context>
struct IsStaticFieldHandlerForStartGroupImpl : std::false_type {};

template <typename T, typename... Context>
struct IsStaticFieldHandlerForStartGroupImpl<
    T,
    std::void_t<decltype(std::declval<const T&>().HandleStartGroup(
        std::declval<Context&>()...))>,
    Context...> : std::true_type {};

template <typename T, typename Enable, typename... Context>
struct IsStaticFieldHandlerForEndGroupImpl : std::false_type {};

template <typename T, typename... Context>
struct IsStaticFieldHandlerForEndGroupImpl<
    T,
    std::void_t<decltype(std::declval<const T&>().HandleEndGroup(
        std::declval<Context&>()...))>,
    Context...> : std::true_type {};

template <typename T, typename Enable = void>
struct IsDynamicFieldHandlerForVarintSomeContext : std::false_type {};

template <typename T>
struct IsDynamicFieldHandlerForVarintSomeContext<
    T, std::enable_if_t<std::is_constructible_v<
           bool, decltype(std::declval<const T&>().AcceptVarint(
                     std::declval<int>()))>>> : std::true_type {};

template <typename T, typename Enable = void>
struct IsDynamicFieldHandlerForFixed32SomeContext : std::false_type {};

template <typename T>
struct IsDynamicFieldHandlerForFixed32SomeContext<
    T, std::enable_if_t<std::is_constructible_v<
           bool, decltype(std::declval<const T&>().AcceptFixed32(
                     std::declval<int>()))>>> : std::true_type {};

template <typename T, typename Enable = void>
struct IsDynamicFieldHandlerForFixed64SomeContext : std::false_type {};

template <typename T>
struct IsDynamicFieldHandlerForFixed64SomeContext<
    T, std::enable_if_t<std::is_constructible_v<
           bool, decltype(std::declval<const T&>().AcceptFixed64(
                     std::declval<int>()))>>> : std::true_type {};

template <typename T, typename Enable = void>
struct IsDynamicFieldHandlerForLengthDelimitedSomeContext : std::false_type {};

template <typename T>
struct IsDynamicFieldHandlerForLengthDelimitedSomeContext<
    T, std::enable_if_t<std::is_constructible_v<
           bool, decltype(std::declval<const T&>().AcceptLengthDelimited(
                     std::declval<int>()))>>> : std::true_type {};

template <typename T, typename Enable = void>
struct IsDynamicFieldHandlerForStartGroupSomeContext : std::false_type {};

template <typename T>
struct IsDynamicFieldHandlerForStartGroupSomeContext<
    T, std::enable_if_t<std::is_constructible_v<
           bool, decltype(std::declval<const T&>().AcceptStartGroup(
                     std::declval<int>()))>>> : std::true_type {};

template <typename T, typename Enable = void>
struct IsDynamicFieldHandlerForEndGroupSomeContext : std::false_type {};

template <typename T>
struct IsDynamicFieldHandlerForEndGroupSomeContext<
    T, std::enable_if_t<std::is_constructible_v<
           bool, decltype(std::declval<const T&>().AcceptEndGroup(
                     std::declval<int>()))>>> : std::true_type {};

template <typename T, typename Enable, typename... Context>
struct IsDynamicFieldHandlerForVarintImpl : std::false_type {};

template <typename T, typename... Context>
struct IsDynamicFieldHandlerForVarintImpl<
    T,
    std::enable_if_t<
        IsDynamicFieldHandlerForVarintSomeContext<T>::value,
        std::void_t<decltype(std::declval<const T&>().DynamicHandleVarint(
            *std::declval<const T&>().AcceptVarint(std::declval<int>()),
            std::declval<uint64_t>(), std::declval<Context&>()...))>>,
    Context...> : std::true_type {};

template <typename T, typename Enable, typename... Context>
struct IsDynamicFieldHandlerForFixed32Impl : std::false_type {};

template <typename T, typename... Context>
struct IsDynamicFieldHandlerForFixed32Impl<
    T,
    std::enable_if_t<
        IsDynamicFieldHandlerForFixed32SomeContext<T>::value,
        std::void_t<decltype(std::declval<const T&>().DynamicHandleFixed32(
            *std::declval<const T&>().AcceptFixed32(std::declval<int>()),
            std::declval<uint32_t>(), std::declval<Context&>()...))>>,
    Context...> : std::true_type {};

template <typename T, typename Enable, typename... Context>
struct IsDynamicFieldHandlerForFixed64Impl : std::false_type {};

template <typename T, typename... Context>
struct IsDynamicFieldHandlerForFixed64Impl<
    T,
    std::enable_if_t<
        IsDynamicFieldHandlerForFixed64SomeContext<T>::value,
        std::void_t<decltype(std::declval<const T&>().DynamicHandleFixed64(
            *std::declval<const T&>().AcceptFixed64(std::declval<int>()),
            std::declval<uint64_t>(), std::declval<Context&>()...))>>,
    Context...> : std::true_type {};

template <typename T, typename Enable, typename... Context>
struct IsDynamicFieldHandlerForLengthDelimitedFromReaderImpl : std::false_type {
};

template <typename T, typename... Context>
struct IsDynamicFieldHandlerForLengthDelimitedFromReaderImpl<
    T,
    std::enable_if_t<
        IsDynamicFieldHandlerForLengthDelimitedSomeContext<T>::value,
        std::void_t<
            decltype(std::declval<const T&>()
                         .DynamicHandleLengthDelimitedFromReader(
                             *std::declval<const T&>().AcceptLengthDelimited(
                                 std::declval<int>()),
                             std::declval<ReaderSpan<>>(),
                             std::declval<Context&>()...))>>,
    Context...> : std::true_type {};

template <typename T, typename Enable, typename... Context>
struct IsDynamicFieldHandlerForLengthDelimitedFromCordImpl : std::false_type {};

template <typename T, typename... Context>
struct IsDynamicFieldHandlerForLengthDelimitedFromCordImpl<
    T,
    std::enable_if_t<
        IsDynamicFieldHandlerForLengthDelimitedSomeContext<T>::value,
        std::void_t<
            decltype(std::declval<const T&>()
                         .DynamicHandleLengthDelimitedFromCord(
                             *std::declval<const T&>().AcceptLengthDelimited(
                                 std::declval<int>()),
                             std::declval<CordIteratorSpan>(),
                             std::declval<std::string&>(),
                             std::declval<Context&>()...))>>,
    Context...> : std::true_type {};

template <typename T, typename Enable, typename... Context>
struct IsDynamicFieldHandlerForLengthDelimitedFromStringImpl : std::false_type {
};

template <typename T, typename... Context>
struct IsDynamicFieldHandlerForLengthDelimitedFromStringImpl<
    T,
    std::enable_if_t<
        IsDynamicFieldHandlerForLengthDelimitedSomeContext<T>::value,
        std::void_t<
            decltype(std::declval<const T&>()
                         .DynamicHandleLengthDelimitedFromString(
                             *std::declval<const T&>().AcceptLengthDelimited(
                                 std::declval<int>()),
                             std::declval<absl::string_view>(),
                             std::declval<Context&>()...))>>,
    Context...> : std::true_type {};

template <typename T, typename Enable, typename... Context>
struct IsDynamicFieldHandlerForStartGroupImpl : std::false_type {};

template <typename T, typename... Context>
struct IsDynamicFieldHandlerForStartGroupImpl<
    T,
    std::enable_if_t<
        IsDynamicFieldHandlerForStartGroupSomeContext<T>::value,
        std::void_t<decltype(std::declval<const T&>().DynamicHandleStartGroup(
            *std::declval<const T&>().AcceptStartGroup(std::declval<int>()),
            std::declval<Context&>()...))>>,
    Context...> : std::true_type {};

template <typename T, typename Enable, typename... Context>
struct IsDynamicFieldHandlerForEndGroupImpl : std::false_type {};

template <typename T, typename... Context>
struct IsDynamicFieldHandlerForEndGroupImpl<
    T,
    std::enable_if_t<
        IsDynamicFieldHandlerForEndGroupSomeContext<T>::value,
        std::void_t<decltype(std::declval<const T&>().DynamicHandleEndGroup(
            *std::declval<const T&>().AcceptEndGroup(std::declval<int>()),
            std::declval<Context&>()...))>>,
    Context...> : std::true_type {};

template <typename T, typename... Context>
using IsStaticFieldHandlerForVarint =
    IsStaticFieldHandlerForVarintImpl<T, void, Context...>;

template <typename T, typename... Context>
using IsStaticFieldHandlerForFixed32 =
    IsStaticFieldHandlerForFixed32Impl<T, void, Context...>;

template <typename T, typename... Context>
using IsStaticFieldHandlerForFixed64 =
    IsStaticFieldHandlerForFixed64Impl<T, void, Context...>;

template <typename T, typename... Context>
using IsStaticFieldHandlerForLengthDelimitedFromReader =
    IsStaticFieldHandlerForLengthDelimitedFromReaderImpl<T, void, Context...>;

template <typename T, typename... Context>
using IsStaticFieldHandlerForLengthDelimitedFromCord =
    IsStaticFieldHandlerForLengthDelimitedFromCordImpl<T, void, Context...>;

template <typename T, typename... Context>
using IsStaticFieldHandlerForLengthDelimitedFromString =
    IsStaticFieldHandlerForLengthDelimitedFromStringImpl<T, void, Context...>;

template <typename T, typename... Context>
struct IsStaticFieldHandlerForLengthDelimited
    : std::disjunction<
          IsStaticFieldHandlerForLengthDelimitedFromReader<T, Context...>,
          // `IsStaticFieldHandlerForLengthDelimitedFromCord` alone
          // is insufficient.
          IsStaticFieldHandlerForLengthDelimitedFromString<T, Context...>> {};

template <typename T, typename... Context>
using IsStaticFieldHandlerForStartGroup =
    IsStaticFieldHandlerForStartGroupImpl<T, void, Context...>;

template <typename T, typename... Context>
using IsStaticFieldHandlerForEndGroup =
    IsStaticFieldHandlerForEndGroupImpl<T, void, Context...>;

template <typename T, typename... Context>
using IsDynamicFieldHandlerForVarint =
    IsDynamicFieldHandlerForVarintImpl<T, void, Context...>;

template <typename T, typename... Context>
using IsDynamicFieldHandlerForFixed32 =
    IsDynamicFieldHandlerForFixed32Impl<T, void, Context...>;

template <typename T, typename... Context>
using IsDynamicFieldHandlerForFixed64 =
    IsDynamicFieldHandlerForFixed64Impl<T, void, Context...>;

template <typename T, typename... Context>
using IsDynamicFieldHandlerForLengthDelimitedFromReader =
    IsDynamicFieldHandlerForLengthDelimitedFromReaderImpl<T, void, Context...>;

template <typename T, typename... Context>
using IsDynamicFieldHandlerForLengthDelimitedFromCord =
    IsDynamicFieldHandlerForLengthDelimitedFromCordImpl<T, void, Context...>;

template <typename T, typename... Context>
using IsDynamicFieldHandlerForLengthDelimitedFromString =
    IsDynamicFieldHandlerForLengthDelimitedFromStringImpl<T, void, Context...>;

template <typename T, typename... Context>
struct IsDynamicFieldHandlerForLengthDelimited
    : std::disjunction<
          IsDynamicFieldHandlerForLengthDelimitedFromReader<T, Context...>,
          // `IsDynamicFieldHandlerForLengthDelimitedFromCord` alone
          // is insufficient.
          IsDynamicFieldHandlerForLengthDelimitedFromString<T, Context...>> {};

template <typename T, typename... Context>
using IsDynamicFieldHandlerForStartGroup =
    IsDynamicFieldHandlerForStartGroupImpl<T, void, Context...>;

template <typename T, typename... Context>
using IsDynamicFieldHandlerForEndGroup =
    IsDynamicFieldHandlerForEndGroupImpl<T, void, Context...>;

template <typename T, typename... Context>
struct IsFieldHandlerForVarint
    : std::disjunction<IsStaticFieldHandlerForVarint<T, Context...>,
                       IsDynamicFieldHandlerForVarint<T, Context...>> {};

template <typename T, typename... Context>
struct IsFieldHandlerForFixed32
    : std::disjunction<IsStaticFieldHandlerForFixed32<T, Context...>,
                       IsDynamicFieldHandlerForFixed32<T, Context...>> {};

template <typename T, typename... Context>
struct IsFieldHandlerForFixed64
    : std::disjunction<IsStaticFieldHandlerForFixed64<T, Context...>,
                       IsDynamicFieldHandlerForFixed64<T, Context...>> {};

template <typename T, typename... Context>
struct IsFieldHandlerForLengthDelimitedFromReader
    : std::disjunction<
          IsStaticFieldHandlerForLengthDelimitedFromReader<T, Context...>,
          IsDynamicFieldHandlerForLengthDelimitedFromReader<T, Context...>> {};

template <typename T, typename... Context>
struct IsFieldHandlerForLengthDelimitedFromCord
    : std::disjunction<
          IsStaticFieldHandlerForLengthDelimitedFromCord<T, Context...>,
          IsDynamicFieldHandlerForLengthDelimitedFromCord<T, Context...>> {};

template <typename T, typename... Context>
struct IsFieldHandlerForLengthDelimitedFromString
    : std::disjunction<
          IsStaticFieldHandlerForLengthDelimitedFromString<T, Context...>,
          IsDynamicFieldHandlerForLengthDelimitedFromString<T, Context...>> {};

template <typename T, typename... Context>
struct IsFieldHandlerForLengthDelimited
    : std::disjunction<IsStaticFieldHandlerForLengthDelimited<T, Context...>,
                       IsDynamicFieldHandlerForLengthDelimited<T, Context...>> {
};

template <typename T, typename... Context>
struct IsFieldHandlerForStartGroup
    : std::disjunction<IsStaticFieldHandlerForStartGroup<T, Context...>,
                       IsDynamicFieldHandlerForStartGroup<T, Context...>>,
      IsDynamicFieldHandlerForStartGroup<T, Context...> {};

template <typename T, typename... Context>
struct IsFieldHandlerForEndGroup
    : std::disjunction<IsStaticFieldHandlerForEndGroup<T, Context...>,
                       IsDynamicFieldHandlerForEndGroup<T, Context...>>,
      IsDynamicFieldHandlerForEndGroup<T, Context...> {};

template <typename T, typename... Context>
struct IsStaticFieldHandlerFromCord
    : std::conjunction<
          IsFieldHandlerWithStaticFieldNumber<T>,
          std::disjunction<
              IsStaticFieldHandlerForVarint<T, Context...>,
              IsStaticFieldHandlerForFixed32<T, Context...>,
              IsStaticFieldHandlerForFixed64<T, Context...>,
              IsStaticFieldHandlerForLengthDelimitedFromCord<T, Context...>,
              IsStaticFieldHandlerForStartGroup<T, Context...>,
              IsStaticFieldHandlerForEndGroup<T, Context...>>,
          std::disjunction<
              IsStaticFieldHandlerForLengthDelimitedFromCord<T, Context...>,
              std::negation<std::disjunction<
                  IsStaticFieldHandlerForLengthDelimitedFromReader<T,
                                                                   Context...>,
                  IsStaticFieldHandlerForLengthDelimitedFromString<
                      T, Context...>>>>> {};

template <typename T, typename... Context>
struct IsStaticFieldHandlerFromString
    : std::conjunction<
          IsFieldHandlerWithStaticFieldNumber<T>,
          std::disjunction<
              IsStaticFieldHandlerForVarint<T, Context...>,
              IsStaticFieldHandlerForFixed32<T, Context...>,
              IsStaticFieldHandlerForFixed64<T, Context...>,
              IsStaticFieldHandlerForLengthDelimitedFromString<T, Context...>,
              IsStaticFieldHandlerForStartGroup<T, Context...>,
              IsStaticFieldHandlerForEndGroup<T, Context...>>,
          std::disjunction<
              IsStaticFieldHandlerForLengthDelimitedFromString<T, Context...>,
              std::negation<std::disjunction<
                  IsStaticFieldHandlerForLengthDelimitedFromReader<T,
                                                                   Context...>,
                  IsStaticFieldHandlerForLengthDelimitedFromCord<
                      T, Context...>>>>> {};

template <typename T, typename... Context>
struct IsStaticFieldHandler
    : std::conjunction<
          IsFieldHandlerWithStaticFieldNumber<T>,
          std::disjunction<
              IsStaticFieldHandlerForVarint<T, Context...>,
              IsStaticFieldHandlerForFixed32<T, Context...>,
              IsStaticFieldHandlerForFixed64<T, Context...>,
              IsStaticFieldHandlerForLengthDelimited<T, Context...>,
              IsStaticFieldHandlerForStartGroup<T, Context...>,
              IsStaticFieldHandlerForEndGroup<T, Context...>>,
          std::disjunction<
              IsStaticFieldHandlerForLengthDelimited<T, Context...>,
              std::negation<IsStaticFieldHandlerForLengthDelimitedFromCord<
                  T, Context...>>>> {};

template <typename T, typename... Context>
struct IsDynamicFieldHandlerFromCord
    : std::conjunction<
          IsFieldHandlerWithDynamicFieldNumber<T>,
          std::disjunction<
              IsDynamicFieldHandlerForVarint<T, Context...>,
              IsDynamicFieldHandlerForFixed32<T, Context...>,
              IsDynamicFieldHandlerForFixed64<T, Context...>,
              IsDynamicFieldHandlerForLengthDelimitedFromCord<T, Context...>,
              IsDynamicFieldHandlerForStartGroup<T, Context...>,
              IsDynamicFieldHandlerForEndGroup<T, Context...>>,
          std::disjunction<
              IsDynamicFieldHandlerForLengthDelimitedFromCord<T, Context...>,
              std::negation<std::disjunction<
                  IsDynamicFieldHandlerForLengthDelimitedFromReader<T,
                                                                    Context...>,
                  IsDynamicFieldHandlerForLengthDelimitedFromString<
                      T, Context...>>>>> {};

template <typename T, typename... Context>
struct IsDynamicFieldHandlerFromString
    : std::conjunction<
          IsFieldHandlerWithDynamicFieldNumber<T>,
          std::disjunction<
              IsDynamicFieldHandlerForVarint<T, Context...>,
              IsDynamicFieldHandlerForFixed32<T, Context...>,
              IsDynamicFieldHandlerForFixed64<T, Context...>,
              IsDynamicFieldHandlerForLengthDelimitedFromString<T, Context...>,
              IsDynamicFieldHandlerForStartGroup<T, Context...>,
              IsDynamicFieldHandlerForEndGroup<T, Context...>>,
          std::disjunction<
              IsDynamicFieldHandlerForLengthDelimitedFromString<T, Context...>,
              std::negation<IsDynamicFieldHandlerForLengthDelimitedFromReader<
                  T, Context...>>>> {};

template <typename T, typename... Context>
struct IsDynamicFieldHandler
    : std::conjunction<
          IsFieldHandlerWithDynamicFieldNumber<T>,
          std::disjunction<
              IsDynamicFieldHandlerForVarint<T, Context...>,
              IsDynamicFieldHandlerForFixed32<T, Context...>,
              IsDynamicFieldHandlerForFixed64<T, Context...>,
              IsDynamicFieldHandlerForLengthDelimited<T, Context...>,
              IsDynamicFieldHandlerForStartGroup<T, Context...>,
              IsDynamicFieldHandlerForEndGroup<T, Context...>>,
          std::disjunction<
              IsDynamicFieldHandlerForLengthDelimited<T, Context...>,
              std::negation<IsDynamicFieldHandlerForLengthDelimitedFromCord<
                  T, Context...>>>> {};

template <typename T, typename... Context>
struct IsFieldHandlerFromCord
    : std::disjunction<IsStaticFieldHandlerFromCord<T, Context...>,
                       IsDynamicFieldHandlerFromCord<T, Context...>> {};

template <typename T, typename... Context>
struct IsFieldHandlerFromString
    : std::disjunction<IsStaticFieldHandlerFromString<T, Context...>,
                       IsDynamicFieldHandlerFromString<T, Context...>> {};

template <typename T, typename... Context>
struct IsUnboundFieldHandlerFromString
    : std::conjunction<
          IsFieldHandlerWithUnboundFieldNumber<T>,
          std::disjunction<
              IsStaticFieldHandlerForVarint<T, Context...>,
              IsStaticFieldHandlerForFixed32<T, Context...>,
              IsStaticFieldHandlerForFixed64<T, Context...>,
              IsStaticFieldHandlerForLengthDelimitedFromString<T, Context...>,
              IsStaticFieldHandlerForStartGroup<T, Context...>,
              IsStaticFieldHandlerForEndGroup<T, Context...>>> {};

template <typename T, typename... Context>
struct IsUnboundFieldHandler
    : std::conjunction<
          IsFieldHandlerWithUnboundFieldNumber<T>,
          std::disjunction<
              IsStaticFieldHandlerForVarint<T, Context...>,
              IsStaticFieldHandlerForFixed32<T, Context...>,
              IsStaticFieldHandlerForFixed64<T, Context...>,
              IsStaticFieldHandlerForLengthDelimited<T, Context...>,
              IsStaticFieldHandlerForStartGroup<T, Context...>,
              IsStaticFieldHandlerForEndGroup<T, Context...>>,
          std::disjunction<
              IsStaticFieldHandlerForLengthDelimited<T, Context...>,
              std::negation<IsStaticFieldHandlerForLengthDelimitedFromCord<
                  T, Context...>>>> {};

ABSL_ATTRIBUTE_COLD absl::Status AnnotateWithFieldNumberSlow(
    absl::Status status, int field_number);
ABSL_ATTRIBUTE_COLD absl::Status AnnotateWithSourceAndFieldNumberSlow(
    absl::Status status, Reader& src, int field_number);

inline absl::Status AnnotateWithFieldNumber(absl::Status status,
                                            int field_number) {
  // Comparison against `absl::CancelledError()` is a fast path of
  // `absl::IsCancelled()`.
  if (ABSL_PREDICT_FALSE(status != absl::CancelledError())) {
    status = AnnotateWithFieldNumberSlow(std::move(status), field_number);
  }
  return status;
}

inline absl::Status AnnotateWithSourceAndFieldNumber(absl::Status status,
                                                     Reader& src,
                                                     int field_number) {
  // Comparison against `absl::CancelledError()` is a fast path of
  // `absl::IsCancelled()`.
  if (ABSL_PREDICT_FALSE(status != absl::CancelledError())) {
    status = AnnotateWithSourceAndFieldNumberSlow(std::move(status), src,
                                                  field_number);
  }
  return status;
}

ABSL_ATTRIBUTE_COLD absl::Status ReadTagError();
ABSL_ATTRIBUTE_COLD absl::Status ReadTagError(Reader& src);
ABSL_ATTRIBUTE_COLD absl::Status ReadVarintError(int field_number);
ABSL_ATTRIBUTE_COLD absl::Status ReadVarintError(Reader& src, int field_number);
ABSL_ATTRIBUTE_COLD absl::Status ReadFixed32Error(int field_number);
ABSL_ATTRIBUTE_COLD absl::Status ReadFixed32Error(Reader& src,
                                                  int field_number);
ABSL_ATTRIBUTE_COLD absl::Status ReadFixed64Error(int field_number);
ABSL_ATTRIBUTE_COLD absl::Status ReadFixed64Error(Reader& src,
                                                  int field_number);
ABSL_ATTRIBUTE_COLD absl::Status NotEnoughError(int field_number,
                                                uint32_t expected_length,
                                                size_t available);
ABSL_ATTRIBUTE_COLD absl::Status NotEnoughError(LimitingReaderBase& src,
                                                int field_number,
                                                uint32_t expected_length);
ABSL_ATTRIBUTE_COLD absl::Status ReadLengthDelimitedLengthError(
    int field_number);
ABSL_ATTRIBUTE_COLD absl::Status ReadLengthDelimitedLengthError(
    Reader& src, int field_number);
ABSL_ATTRIBUTE_COLD absl::Status ReadLengthDelimitedValueError(Reader& src);
ABSL_ATTRIBUTE_COLD absl::Status ReadLengthDelimitedValueError(
    Reader& src, int field_number);
ABSL_ATTRIBUTE_COLD absl::Status InvalidWireTypeError(uint32_t tag);
ABSL_ATTRIBUTE_COLD absl::Status InvalidWireTypeError(Reader& src,
                                                      uint32_t tag);

template <typename FieldHandler, typename... Context>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool ReadVarintField(
    int field_number, uint64_t repr, absl::Status& status,
    const FieldHandler& field_handler, Context&... context) {
  if constexpr (IsStaticFieldHandlerForVarint<FieldHandler,
                                              Context...>::value) {
    if (field_number == FieldHandler::kFieldNumber) {
      status = field_handler.HandleVarint(repr, context...);
      return true;
    }
  }
  if constexpr (IsDynamicFieldHandlerForVarint<FieldHandler,
                                               Context...>::value) {
    auto maybe_accepted = field_handler.AcceptVarint(field_number);
    if (maybe_accepted) {
      status = field_handler.DynamicHandleVarint(*std::move(maybe_accepted),
                                                 repr, context...);
      return true;
    }
  }
  return false;
}

template <typename FieldHandler, typename... Context>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool ReadFixed32Field(
    int field_number, uint32_t repr, absl::Status& status,
    const FieldHandler& field_handler, Context&... context) {
  if constexpr (IsStaticFieldHandlerForFixed32<FieldHandler,
                                               Context...>::value) {
    if (field_number == FieldHandler::kFieldNumber) {
      status = field_handler.HandleFixed32(repr, context...);
      return true;
    }
  }
  if constexpr (IsDynamicFieldHandlerForFixed32<FieldHandler,
                                                Context...>::value) {
    auto maybe_accepted = field_handler.AcceptFixed32(field_number);
    if (maybe_accepted) {
      status = field_handler.DynamicHandleFixed32(*std::move(maybe_accepted),
                                                  repr, context...);
      return true;
    }
  }
  return false;
}

template <typename FieldHandler, typename... Context>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool ReadFixed64Field(
    int field_number, uint64_t repr, absl::Status& status,
    const FieldHandler& field_handler, Context&... context) {
  if constexpr (IsStaticFieldHandlerForFixed64<FieldHandler,
                                               Context...>::value) {
    if (field_number == FieldHandler::kFieldNumber) {
      status = field_handler.HandleFixed64(repr, context...);
      return true;
    }
  }
  if constexpr (IsDynamicFieldHandlerForFixed64<FieldHandler,
                                                Context...>::value) {
    auto maybe_accepted = field_handler.AcceptFixed64(field_number);
    if (maybe_accepted) {
      status = field_handler.DynamicHandleFixed64(*std::move(maybe_accepted),
                                                  repr, context...);
      return true;
    }
  }
  return false;
}

template <typename FieldHandler, typename... Context>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool ReadLengthDelimitedFieldFromReader(
    int field_number, LimitingReaderBase& src, size_t length,
    absl::Status& status, const FieldHandler& field_handler,
    Context&... context) {
  if constexpr (IsStaticFieldHandlerForLengthDelimitedFromReader<
                    FieldHandler, Context...>::value) {
    if (field_number == FieldHandler::kFieldNumber) {
      status = field_handler.HandleLengthDelimitedFromReader(
          ReaderSpan<>(&src, length), context...);
      return true;
    }
  } else if constexpr (IsStaticFieldHandlerForLengthDelimitedFromString<
                           FieldHandler, Context...>::value) {
    if (field_number == FieldHandler::kFieldNumber) {
      absl::string_view value;
      if (ABSL_PREDICT_FALSE(!src.Read(length, value))) {
        status = ReadLengthDelimitedValueError(src);
        return true;
      }
      status = field_handler.HandleLengthDelimitedFromString(value, context...);
      return true;
    }
  }
  if constexpr (IsDynamicFieldHandlerForLengthDelimitedFromReader<
                    FieldHandler, Context...>::value) {
    auto maybe_accepted = field_handler.AcceptLengthDelimited(field_number);
    if (maybe_accepted) {
      status = field_handler.DynamicHandleLengthDelimitedFromReader(
          *std::move(maybe_accepted), ReaderSpan<>(&src, length), context...);
      return true;
    }
  } else if constexpr (IsDynamicFieldHandlerForLengthDelimitedFromString<
                           FieldHandler, Context...>::value) {
    auto maybe_accepted = field_handler.AcceptLengthDelimited(field_number);
    if (maybe_accepted) {
      absl::string_view value;
      if (ABSL_PREDICT_FALSE(!src.Read(length, value))) {
        status = ReadLengthDelimitedValueError(src);
        return true;
      }
      status = field_handler.DynamicHandleLengthDelimitedFromString(
          *std::move(maybe_accepted), value, context...);
      return true;
    }
  }
  return false;
}

template <typename FieldHandler, typename... Context>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool ReadLengthDelimitedFieldFromCord(
    int field_number, absl::Cord::CharIterator& src, size_t length,
    std::string& scratch, absl::Status& status,
    const FieldHandler& field_handler, Context&... context) {
  if constexpr (IsStaticFieldHandlerForLengthDelimitedFromCord<
                    FieldHandler, Context...>::value) {
    if (field_number == FieldHandler::kFieldNumber) {
      status = field_handler.HandleLengthDelimitedFromCord(
          CordIteratorSpan(&src, length), scratch, context...);
      return true;
    }
  } else if constexpr (IsStaticFieldHandlerForLengthDelimitedFromString<
                           FieldHandler, Context...>::value) {
    if (field_number == FieldHandler::kFieldNumber) {
      status = field_handler.HandleLengthDelimitedFromString(
          CordIteratorSpan(&src, length).ToStringView(scratch), context...);
      return true;
    }
  }
  if constexpr (IsDynamicFieldHandlerForLengthDelimitedFromCord<
                    FieldHandler, Context...>::value) {
    auto maybe_accepted = field_handler.AcceptLengthDelimited(field_number);
    if (maybe_accepted) {
      status = field_handler.DynamicHandleLengthDelimitedFromCord(
          *std::move(maybe_accepted), CordIteratorSpan(&src, length), scratch,
          context...);
      return true;
    }
  } else if constexpr (IsDynamicFieldHandlerForLengthDelimitedFromString<
                           FieldHandler, Context...>::value) {
    auto maybe_accepted = field_handler.AcceptLengthDelimited(field_number);
    if (maybe_accepted) {
      status = field_handler.DynamicHandleLengthDelimitedFromString(
          *std::move(maybe_accepted),
          CordIteratorSpan(&src, length).ToStringView(scratch), context...);
      return true;
    }
  }
  return false;
}

template <typename FieldHandler, typename... Context>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool ReadLengthDelimitedFieldFromString(
    int field_number, const char* src, size_t length, absl::Status& status,
    const FieldHandler& field_handler, Context&... context) {
  if constexpr (IsStaticFieldHandlerForLengthDelimitedFromString<
                    FieldHandler, Context...>::value) {
    if (field_number == FieldHandler::kFieldNumber) {
      status = field_handler.HandleLengthDelimitedFromString(
          absl::string_view(src, length), context...);
      return true;
    }
  }
  if constexpr (IsDynamicFieldHandlerForLengthDelimitedFromString<
                    FieldHandler, Context...>::value) {
    auto maybe_accepted = field_handler.AcceptLengthDelimited(field_number);
    if (maybe_accepted) {
      status = field_handler.DynamicHandleLengthDelimitedFromString(
          *std::move(maybe_accepted), absl::string_view(src, length),
          context...);
      return true;
    }
  }
  return false;
}

template <typename FieldHandler, typename... Context>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool ReadStartGroupField(
    int field_number, absl::Status& status, const FieldHandler& field_handler,
    Context&... context) {
  if constexpr (IsStaticFieldHandlerForStartGroup<FieldHandler,
                                                  Context...>::value) {
    if (field_number == FieldHandler::kFieldNumber) {
      status = field_handler.HandleStartGroup(context...);
      return true;
    }
  }
  if constexpr (IsDynamicFieldHandlerForStartGroup<FieldHandler,
                                                   Context...>::value) {
    auto maybe_accepted = field_handler.AcceptStartGroup(field_number);
    if (maybe_accepted) {
      status = field_handler.DynamicHandleStartGroup(*std::move(maybe_accepted),
                                                     context...);
      return true;
    }
  }
  return false;
}

template <typename FieldHandler, typename... Context>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool ReadEndGroupField(
    int field_number, absl::Status& status, const FieldHandler& field_handler,
    Context&... context) {
  if constexpr (IsStaticFieldHandlerForEndGroup<FieldHandler,
                                                Context...>::value) {
    if (field_number == FieldHandler::kFieldNumber) {
      status = field_handler.HandleEndGroup(context...);
      return true;
    }
  }
  if constexpr (IsDynamicFieldHandlerForEndGroup<FieldHandler,
                                                 Context...>::value) {
    auto maybe_accepted = field_handler.AcceptEndGroup(field_number);
    if (maybe_accepted) {
      status = field_handler.DynamicHandleEndGroup(*std::move(maybe_accepted),
                                                   context...);
      return true;
    }
  }
  return false;
}

}  // namespace riegeli::serialized_message_reader_internal

#endif  // RIEGELI_MESSAGES_SERIALIZED_MESSAGE_READER_INTERNAL_H_


================================================
FILE: riegeli/messages/serialized_message_writer.cc
================================================
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/messages/serialized_message_writer.h"

#include <stddef.h>
#include <stdint.h>

#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "riegeli/base/any.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/cord_iterator_span.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/cord_writer.h"
#include "riegeli/bytes/limiting_reader.h"
#include "riegeli/bytes/read_all.h"
#include "riegeli/bytes/reader.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

absl::Status SerializedMessageWriter::LengthOverflowError(Position length) {
  return absl::ResourceExhaustedError(
      absl::StrCat("Failed to write length-delimited field "
                   "because its size must be smaller than 2GiB: ",
                   length));
}

absl::Status SerializedMessageWriter::WriteStringFailed(Reader& src,
                                                        Writer& dest) {
  return !dest.ok()
             ? dest.status()
             : src.StatusOrAnnotate(absl::InvalidArgumentError(
                   "Could not read contents for a length-delimited field"));
}

absl::Status SerializedMessageWriter::WriteString(int field_number,
                                                  AnyRef<Reader*> src) {
  if (src.IsOwning()) src->SetReadAllHint(true);
  if (src->SupportsSize()) {
    const std::optional<Position> size = src->Size();
    if (ABSL_PREDICT_FALSE(size == std::nullopt)) return src->status();
    if (absl::Status status = WriteString(
            field_number,
            ReaderSpan(src.get(), SaturatingSub(*size, src->pos())));
        ABSL_PREDICT_FALSE(!status.ok())) {
      return status;
    }
    if (src.IsOwning()) {
      if (ABSL_PREDICT_FALSE(!src->Close())) return src->status();
    }
    return absl::OkStatus();
  } else {
    absl::Cord contents;
    if (absl::Status status = ReadAll(std::move(src), contents);
        ABSL_PREDICT_FALSE(!status.ok())) {
      return status;
    }
    return WriteString(field_number, std::move(contents));
  }
}

absl::Status SerializedMessageWriter::WriteString(int field_number,
                                                  CordIteratorSpan src) {
  if (src.length() <= kMaxBytesToCopy) {
    if (absl::Status status = WriteLengthUnchecked(field_number, src.length());
        ABSL_PREDICT_FALSE(!status.ok())) {
      return status;
    }
    if (ABSL_PREDICT_FALSE(!writer().Push(src.length()))) {
      return writer().status();
    }
    CordIteratorSpan::Read(src.iterator(), src.length(), writer().cursor());
    writer().move_cursor(src.length());
    return absl::OkStatus();
  }
  return WriteString(field_number, std::move(src).ToCord());
}

void SerializedMessageWriter::OpenLengthDelimited() {
  writer_ = &submessages_.emplace_back();
}

SerializedMessageWriter SerializedMessageWriter::NewLengthDelimited()
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  SerializedMessageWriter message(&writer());
  message.OpenLengthDelimited();
  return message;
}

absl::Status SerializedMessageWriter::CloseLengthDelimited(int field_number) {
  RIEGELI_ASSERT(!submessages_.empty())
      << "Failed precondition of "
         "SerializedMessageWriter::CloseLengthDelimited(): "
         "no matching OpenLengthDelimited() call";
  RIEGELI_ASSERT(writer_ == &submessages_.back())
      << "Failed invariant of SerializedMessageWriter: "
         "writer() does not point to the most recently open submessage";
  CordWriter<absl::Cord>& submessage = submessages_.back();
  if (ABSL_PREDICT_FALSE(!submessage.Close())) return submessage.status();
  writer_ = submessages_.size() > 1 ? &submessages_.end()[-2] : dest_;
  if (absl::Status status =
          WriteLengthUnchecked(field_number, submessage.dest().size());
      ABSL_PREDICT_FALSE(!status.ok())) {
    return status;
  }
  RIEGELI_ASSERT(writer_ != nullptr)
      << "Failed precondition of CloseLengthDelimited(): "
         "set_dest() not called before the last CloseLengthDelimited()";
  if (ABSL_PREDICT_FALSE(!writer_->Write(std::move(submessage.dest())))) {
    return writer_->status();
  }
  submessages_.pop_back();
  return absl::OkStatus();
}

absl::Status SerializedMessageWriter::CloseOptionalLengthDelimited(
    int field_number) {
  RIEGELI_ASSERT(!submessages_.empty())
      << "Failed precondition of "
         "SerializedMessageWriter::CloseOptionalLengthDelimited(): "
         "no matching OpenLengthDelimited() call";
  RIEGELI_ASSERT(writer_ == &submessages_.back())
      << "Failed invariant of SerializedMessageWriter: "
         "writer() does not point to the most recently open submessage";
  CordWriter<absl::Cord>& submessage = submessages_.back();
  if (ABSL_PREDICT_FALSE(!submessage.Close())) return submessage.status();
  writer_ = submessages_.size() > 1 ? &submessages_.end()[-2] : dest_;
  if (!submessage.dest().empty()) {
    if (absl::Status status =
            WriteLengthUnchecked(field_number, submessage.dest().size());
        ABSL_PREDICT_FALSE(!status.ok())) {
      return status;
    }
    RIEGELI_ASSERT(writer_ != nullptr)
        << "Failed precondition of CloseOptionalLengthDelimited(): "
           "set_dest() not called before the last "
           "CloseOptionalLengthDelimited()";
    if (ABSL_PREDICT_FALSE(!writer_->Write(std::move(submessage.dest())))) {
      return writer_->status();
    }
  }
  submessages_.pop_back();
  return absl::OkStatus();
}

}  // namespace riegeli


================================================
FILE: riegeli/messages/serialized_message_writer.h
================================================
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_MESSAGES_SERIALIZED_MESSAGE_WRITER_H_
#define RIEGELI_MESSAGES_SERIALIZED_MESSAGE_WRITER_H_

#include <stdint.h>

#include <limits>
#include <type_traits>
#include <utility>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/base/casts.h"
#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/any.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/constexpr.h"
#include "riegeli/base/cord_iterator_span.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/cord_writer.h"
#include "riegeli/bytes/limiting_reader.h"
#include "riegeli/bytes/null_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/stringify.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/endian/endian_writing.h"
#include "riegeli/messages/message_wire_format.h"
#include "riegeli/messages/serialize_message.h"
#include "riegeli/varint/varint_writing.h"

ABSL_POINTERS_DEFAULT_NONNULL

namespace riegeli {

// `SerializedMessageWriter` builds a serialized proto message, specifying
// contents of particular fields, instead of traversing an in-memory message
// object like in `SerializeMessage()`.
//
// Use cases:
//
//  * Processing a subset of fields without the overhead of materializing the
//    message object, i.e. without processing fields contained in submessages
//    which can be processed as a whole, and without keeping the whole parsed
//    message in memory.
//
//  * Processing a message in a way known at runtime, possibly with the schema
//    known at runtime, possibly partially known.
//
//  * Processing messages with so many elements of toplevel repeated fields that
//    the total message size exceeds 2GiB. This is not a great idea in itself,
//    because such messages cannot be processed using native proto parsing and
//    serialization.
//
// `SerializedMessageBackwardWriter` is more efficient than
// `SerializedMessageWriter` in the case of nested messages, because their
// contents can be written directly to the original `BackwardWriter`, with the
// length known and written after building the contents.
class SerializedMessageWriter {
 public:
  // An empty object. It can be associated with a particular message by
  // `set_dest()` or assignment.
  //
  // An empty `SerializedMessageWriter` is not usable directly, except that
  // submessage contents can be accumulated after `OpenLengthDelimited()`
  // if `set_dest()` is called before the matching `CloseLengthDelimited()`
  // or `CloseOptionalLengthDelimited()`.
  SerializedMessageWriter() = default;

  // Will write to `*dest`, which is not owned and must outlive usages of this
  // object.
  explicit SerializedMessageWriter(
      Writer* absl_nullable dest ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : dest_(dest), writer_(dest) {}

  SerializedMessageWriter(SerializedMessageWriter&& that) noexcept;
  SerializedMessageWriter& operator=(SerializedMessageWriter&& that) noexcept;

  // Returns the original `Writer` of the root message.
  Writer* absl_nullable dest() const { return dest_; }

  // Changes the `Writer` of the root message.
  //
  // This can be called even during building, even when submessages are open.
  // It particular this must be called when the original `Writer` has been
  // moved.
  void set_dest(Writer* absl_nullable dest);

  // Returns the `Writer` of the current message or length-delimited field being
  // built. This can be the original `Writer` of the root message, or the
  // `Writer` of a field.
  //
  // This can be used to write parts of the message directly, apart from
  // `Write...()` functions which write whole fields.
  Writer& writer() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    RIEGELI_ASSERT(writer_ != nullptr)
        << "Failed precondition of SerializedMessageWriter::writer(): "
           "dest() not set while writing the root message";
    return *writer_;
  }

  // Writes the field tag and the numeric or enum field value.
  absl::Status WriteInt32(int field_number, int32_t value);
  absl::Status WriteInt64(int field_number, int64_t value);
  absl::Status WriteUInt32(int field_number, uint32_t value);
  absl::Status WriteUInt64(int field_number, uint64_t value);
  absl::Status WriteSInt32(int field_number, int32_t value);
  absl::Status WriteSInt64(int field_number, int64_t value);
  absl::Status WriteBool(int field_number, bool value);
  absl::Status WriteFixed32(int field_number, uint32_t value);
  absl::Status WriteFixed64(int field_number, uint64_t value);
  absl::Status WriteSFixed32(int field_number, int32_t value);
  absl::Status WriteSFixed64(int field_number, int64_t value);
  absl::Status WriteFloat(int field_number, float value);
  absl::Status WriteDouble(int field_number, double value);
  template <typename EnumType,
            std::enable_if_t<std::disjunction_v<std::is_enum<EnumType>,
                                                std::is_integral<EnumType>>,
                             int> = 0>
  absl::Status WriteEnum(int field_number, EnumType value);

  // Writes the field tag and the `string`, `bytes`, or submessage field value.
  //
  // Message objects are excluded here because they are stringified in the text
  // format, which is rarely intended as a field value. A separate overload
  // below serializes a message object in the binary format.
  template <typename... Values
#if !__cpp_concepts
            ,
            std::enable_if_t<
                std::conjunction_v<
                    IsStringifiable<Values...>,
                    std::negation<std::is_convertible<
                        Values&&, const google::protobuf::MessageLite&>>...>,
                int> = 0
#endif
            >
#if __cpp_concepts
  // For conjunctions, `requires` gives better error messages than
  // `std::enable_if_t`, indicating the relevant argument.
    requires((IsStringifiable<Values>::value && ...) &&
             (!std::is_convertible_v<Values &&,
                                     const google::protobuf::MessageLite&> &&
              ...))
#endif
  absl::Status WriteString(int field_number, Values&&... values);
  absl::Status WriteString(int field_number, AnyRef<Reader*> src);
  template <typename ReaderType>
  absl::Status WriteString(int field_number, ReaderSpan<ReaderType> src);
  absl::Status WriteString(int field_number, CordIteratorSpan src);

  // Writes the field tag of a length-delimited field and serializes a message
  // as the field value.
  absl::Status WriteString(int field_number,
                           const google::protobuf::MessageLite& message,
                           SerializeMessageOptions options = {});

  // Writes an element of a packed repeated field.
  //
  // The field must have been opened with `OpenLengthDelimited()`,
  // `NewLengthDelimited()`, or `WriteLengthUnchecked()`.
  absl::Status WritePackedInt32(int32_t value);
  absl::Status WritePackedInt64(int64_t value);
  absl::Status WritePackedUInt32(uint32_t value);
  absl::Status WritePackedUInt64(uint64_t value);
  absl::Status WritePackedSInt32(int32_t value);
  absl::Status WritePackedSInt64(int64_t value);
  absl::Status WritePackedBool(bool value);
  absl::Status WritePackedFixed32(uint32_t value);
  absl::Status WritePackedFixed64(uint64_t value);
  absl::Status WritePackedSFixed32(int32_t value);
  absl::Status WritePackedSFixed64(int64_t value);
  absl::Status WritePackedFloat(float value);
  absl::Status WritePackedDouble(double value);
  template <typename EnumType,
            std::enable_if_t<std::disjunction_v<std::is_enum<EnumType>,
                                                std::is_integral<EnumType>>,
                             int> = 0>
  absl::Status WritePackedEnum(EnumType value);

  // Begins accumulating contents of a length-delimited field.
  //
  // Field contents written to `writer()` are accumulated in memory until
  // `CloseLengthDelimited()` or `CloseOptionalLengthDelimited()` is called.
  //
  // If `OpenLengthDelimited()` is used a lot and building the message back to
  // front is feasible, then `SerializedMessageBackwardWriter` is more
  // efficient.
  void OpenLengthDelimited();

  // Returns a new `SerializedMessageWriter` which accumulates contents of a
  // length-delimited field of this `SerializedMessageWriter`.
  //
  // The contents are written to the parent `SerializedMessageWriter` by the
  // matching `CloseLengthDelimited()` or `CloseOptionalLengthDelimited()` call
  // on the returned `SerializedMessageWriter`. Multiple such
  // `SerializedMessageWriter` objects can be active at the same time.
  SerializedMessageWriter NewLengthDelimited() ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Ends accumulating contents of a length-delimited field, and writes the
  // field to the parent message.
  //
  // Each `OpenLengthDelimited()` or `NewLengthDelimited()` call must be matched
  // with a `CloseLengthDelimited()` or `CloseOptionalLengthDelimited()` call,
  // unless the `SerializedMessageWriter` is no longer used.
  absl::Status CloseLengthDelimited(int field_number);

  // Like `CloseLengthDelimited()`, but does not write the field if its contents
  // turn out to be empty.
  absl::Status CloseOptionalLengthDelimited(int field_number);

  // Writes the field tag and the length of a length-delimited field.
  //
  // The value must be written afterwards to `writer()`, with exactly `length`
  // bytes, unless the `SerializedMessageWriter` and its `dest()` are no longer
  // used.
  //
  // Fails if `length` exceeds 2GiB.
  //
  // `WriteLengthUnchecked()` is more efficient than `OpenLengthDelimited()`
  // or `NewLengthDelimited()` with `CloseLengthDelimited()`, but harder to use:
  // the length must be pledged before writing the contents, and its correctness
  // is not checked.
  absl::Status WriteLengthUnchecked(int field_number, Position length);

  // Writes a group delimiter.
  //
  // Each `OpenGroup()` must be matched with a `CloseGroup()` call, unless the
  // `SerializedMessageWriter` and its `dest()` are no longer used.
  absl::Status OpenGroup(int field_number);
  absl::Status CloseGroup(int field_number);

  // Returns the length of the numeric or enum field which would be written.
  //
  // This is useful for `WriteLengthUnchecked()`.
  static Position LengthOfInt32(int field_number, int32_t value);
  static Position LengthOfInt64(int field_number, int64_t value);
  static Position LengthOfUInt32(int field_number, uint32_t value);
  static Position LengthOfUInt64(int field_number, uint64_t value);
  static Position LengthOfSInt32(int field_number, int32_t value);
  static Position LengthOfSInt64(int field_number, int64_t value);
  static Position LengthOfBool(int field_number);
  static Position LengthOfFixed32(int field_number);
  static Position LengthOfFixed64(int field_number);
  static Position LengthOfSFixed32(int field_number);
  static Position LengthOfSFixed64(int field_number);
  static Position LengthOfFloat(int field_number);
  static Position LengthOfDouble(int field_number);
  template <typename EnumType,
            std::enable_if_t<std::disjunction_v<std::is_enum<EnumType>,
                                                std::is_integral<EnumType>>,
                             int> = 0>
  static Position LengthOfEnum(int field_number, EnumType value);

  // Returns the length of the `string`, bytes, or submessage field which would
  // be written.
  //
  // This is useful for `WriteLengthUnchecked()`.
  template <typename... Values
#if !__cpp_concepts
            ,
            std::enable_if_t<
                std::conjunction_v<
                    IsStringifiable<Values...>,
                    std::negation<std::is_convertible<
                        Values&&, const google::protobuf::MessageLite&>>...>,
                int> = 0
#endif
            >
#if __cpp_concepts
  // For conjunctions, `requires` gives better error messages than
  // `std::enable_if_t`, indicating the relevant argument.
    requires((IsStringifiable<Values>::value && ...) &&
             (!std::is_convertible_v<Values &&,
                                     const google::protobuf::MessageLite&> &&
              ...))
#endif
  static Position LengthOfString(int field_number, const Values&... values);

  // Returns the length of a length-delimited field which would be written, for
  // the given length of the value.
  //
  // This is useful for `WriteLengthUnchecked()`.
  static Position LengthOfLengthDelimited(int field_number, Position length);

  // Like `LengthOfLengthDelimited()`, but does not count the field if its
  // contents turn out to be empty.
  static Position LengthOfOptionalLengthDelimited(int field_number,
                                                  Position length);

  // Returns the length of an element of a packed repeated field which would be
  // written.
  //
  // This is useful for `WriteLengthUnchecked()`.
  static Position LengthOfPackedInt32(int32_t value);
  static Position LengthOfPackedInt64(int64_t value);
  static Position LengthOfPackedUInt32(uint32_t value);
  static Position LengthOfPackedUInt64(uint64_t value);
  static Position LengthOfPackedSInt32(int32_t value);
  static Position LengthOfPackedSInt64(int64_t value);
  static Position LengthOfPackedBool();
  static Position LengthOfPackedFixed32();
  static Position LengthOfPackedFixed64();
  static Position LengthOfPackedSFixed32();
  static Position LengthOfPackedSFixed64();
  static Position LengthOfPackedFloat();
  static Position LengthOfPackedDouble();
  template <typename EnumType,
            std::enable_if_t<std::disjunction_v<std::is_enum<EnumType>,
                                                std::is_integral<EnumType>>,
                             int> = 0>
  static Position LengthOfPackedEnum(EnumType value);

  // Returns the length of both group delimiters which would be written.
  //
  // This is useful for `WriteLengthUnchecked()`.
  static Position LengthOfOpenPlusCloseGroup(int field_number);

 private:
  ABSL_ATTRIBUTE_COLD static absl::Status LengthOverflowError(Position length);
  ABSL_ATTRIBUTE_COLD static absl::Status WriteStringFailed(Reader& src,
                                                            Writer& dest);

  Writer* absl_nullable dest_ = nullptr;
  std::vector<CordWriter<absl::Cord>> submessages_;
  Writer* absl_nullable writer_ = nullptr;

  // Invariant:
  //   `writer_ == (submessages_.empty() ? dest_ : &submessages_.back())`
};

// Implementation details follow.

inline void SerializedMessageWriter::set_dest(Writer* absl_nullable dest) {
  dest_ = dest;
  if (submessages_.empty()) writer_ = dest;
}

inline SerializedMessageWriter::SerializedMessageWriter(
    SerializedMessageWriter&& that) noexcept
    : dest_(that.dest_),
      submessages_(std::exchange(that.submessages_, {})),
      // This relies on pointer stability when `std::vector` is moved.
      writer_(std::exchange(that.writer_, that.dest_)) {}

inline SerializedMessageWriter& SerializedMessageWriter::operator=(
    SerializedMessageWriter&& that) noexcept {
  dest_ = that.dest_;
  submessages_ = std::exchange(that.submessages_, {});
  // This relies on pointer stability when `std::vector` is moved.
  writer_ = std::exchange(that.writer_, that.dest_);
  return *this;
}

inline absl::Status SerializedMessageWriter::WriteInt32(int field_number,
                                                        int32_t value) {
  return WriteUInt64(field_number, static_cast<uint64_t>(value));
}

inline absl::Status SerializedMessageWriter::WriteInt64(int field_number,
                                                        int64_t value) {
  return WriteUInt64(field_number, static_cast<uint64_t>(value));
}

inline absl::Status SerializedMessageWriter::WriteUInt32(int field_number,
                                                         uint32_t value) {
  const uint32_t tag = MakeTag(field_number, WireType::kVarint);
  if (ABSL_PREDICT_FALSE(!writer().Push(
          (RIEGELI_IS_CONSTANT(tag) ||
                   (RIEGELI_IS_CONSTANT(tag < 0x80) && tag < 0x80)
               ? LengthVarint32(tag)
               : kMaxLengthVarint32) +
          (RIEGELI_IS_CONSTANT(value) ||
                   (RIEGELI_IS_CONSTANT(value < 0x80) && value < 0x80)
               ? LengthVarint32(value)
               : kMaxLengthVarint32)))) {
    return writer().status();
  }
  char* ptr = WriteVarint32(tag, writer().cursor());
  ptr = WriteVarint32(value, ptr);
  writer().set_cursor(ptr);
  return absl::OkStatus();
}

inline absl::Status SerializedMessageWriter::WriteUInt64(int field_number,
                                                         uint64_t value) {
  const uint32_t tag = MakeTag(field_number, WireType::kVarint);
  if (ABSL_PREDICT_FALSE(!writer().Push(
          (RIEGELI_IS_CONSTANT(tag) ||
                   (RIEGELI_IS_CONSTANT(tag < 0x80) && tag < 0x80)
               ? LengthVarint32(tag)
               : kMaxLengthVarint32) +
          (RIEGELI_IS_CONSTANT(value) ||
                   (RIEGELI_IS_CONSTANT(value < 0x80) && value < 0x80)
               ? LengthVarint64(value)
               : kMaxLengthVarint64)))) {
    return writer().status();
  }
  char* ptr = WriteVarint32(tag, writer().cursor());
  ptr = WriteVarint64(value, ptr);
  writer().set_cursor(ptr);
  return absl::OkStatus();
}

inline absl::Status SerializedMessageWriter::WriteSInt32(int field_number,
                                                         int32_t value) {
  return WriteUInt32(field_number, EncodeVarintSigned32(value));
}

inline absl::Status SerializedMessageWriter::WriteSInt64(int field_number,
                                                         int64_t value) {
  return WriteUInt64(field_number, EncodeVarintSigned64(value));
}

inline absl::Status SerializedMessageWriter::WriteBool(int field_number,
                                                       bool value) {
  return WriteUInt32(field_number, value ? 1 : 0);
}

inline absl::Status SerializedMessageWriter::WriteFixed32(int field_number,
                                                          uint32_t value) {
  const uint32_t tag = MakeTag(field_number, WireType::kFixed32);
  if (ABSL_PREDICT_FALSE(!writer().Push(
          (RIEGELI_IS_CONSTANT(tag) ||
                   (RIEGELI_IS_CONSTANT(tag < 0x80) && tag < 0x80)
               ? LengthVarint32(tag)
               : kMaxLengthVarint32) +
          sizeof(uint32_t)))) {
    return writer().status();
  }
  char* ptr = WriteVarint32(tag, writer().cursor());
  WriteLittleEndian<uint32_t>(value, ptr);
  ptr += sizeof(uint32_t);
  writer().set_cursor(ptr);
  return absl::OkStatus();
}

inline absl::Status SerializedMessageWriter::WriteFixed64(int field_number,
                                                          uint64_t value) {
  const uint32_t tag = MakeTag(field_number, WireType::kFixed64);
  if (ABSL_PREDICT_FALSE(!writer().Push(
          (RIEGELI_IS_CONSTANT(tag) ||
                   (RIEGELI_IS_CONSTANT(tag < 0x80) && tag < 0x80)
               ? LengthVarint32(tag)
               : kMaxLengthVarint32) +
          sizeof(uint64_t)))) {
    return writer().status();
  }
  char* ptr = WriteVarint32(tag, writer().cursor());
  WriteLittleEndian<uint64_t>(value, ptr);
  ptr += sizeof(uint64_t);
  writer().set_cursor(ptr);
  return absl::OkStatus();
}

inline absl::Status SerializedMessageWriter::WriteSFixed32(int field_number,
                                                           int32_t value) {
  return WriteFixed32(field_number, static_cast<uint32_t>(value));
}

inline absl::Status SerializedMessageWriter::WriteSFixed64(int field_number,
                                                           int64_t value) {
  return WriteFixed64(field_number, static_cast<uint64_t>(value));
}

inline absl::Status SerializedMessageWriter::WriteFloat(int field_number,
                                                        float value) {
  return WriteFixed32(field_number, absl::bit_cast<uint32_t>(value));
}

inline absl::Status SerializedMessageWriter::WriteDouble(int field_number,
                                                         double value) {
  return WriteFixed64(field_number, absl::bit_cast<uint64_t>(value));
}

template <typename EnumType,
          std::enable_if_t<std::disjunction_v<std::is_enum<EnumType>,
                                              std::is_integral<EnumType>>,
                           int>>
inline absl::Status SerializedMessageWriter::WriteEnum(int field_number,
                                                       EnumType value) {
  return WriteUInt64(field_number, static_cast<uint64_t>(value));
}

inline absl::Status SerializedMessageWriter::WritePackedInt32(int32_t value) {
  return WritePackedUInt64(static_cast<uint64_t>(value));
}

inline absl::Status SerializedMessageWriter::WritePackedInt64(int64_t value) {
  return WritePackedUInt64(static_cast<uint64_t>(value));
}

inline absl::Status SerializedMessageWriter::WritePackedUInt32(uint32_t value) {
  if (ABSL_PREDICT_FALSE(!WriteVarint32(value, writer()))) {
    return writer().status();
  }
  return absl::OkStatus();
}

inline absl::Status SerializedMessageWriter::WritePackedUInt64(uint64_t value) {
  if (ABSL_PREDICT_FALSE(!WriteVarint64(value, writer()))) {
    return writer().status();
  }
  return absl::OkStatus();
}

inline absl::Status SerializedMessageWriter::WritePackedSInt32(int32_t value) {
  return WritePackedUInt32(EncodeVarintSigned32(value));
}

inline absl::Status SerializedMessageWriter::WritePackedSInt64(int64_t value) {
  return WritePackedUInt64(EncodeVarintSigned64(value));
}

inline absl::Status SerializedMessageWriter::WritePackedBool(bool value) {
  return WritePackedUInt32(value ? 1 : 0);
}

inline absl::Status SerializedMessageWriter::WritePackedFixed32(
    uint32_t value) {
  if (ABSL_PREDICT_FALSE(!WriteLittleEndian<uint32_t>(value, writer()))) {
    return writer().status();
  }
  return absl::OkStatus();
}

inline absl::Status SerializedMessageWriter::WritePackedFixed64(
    uint64_t value) {
  if (ABSL_PREDICT_FALSE(!WriteLittleEndian<uint64_t>(value, writer()))) {
    return writer().status();
  }
  return absl::OkStatus();
}

inline absl::Status SerializedMessageWriter::WritePackedSFixed32(
    int32_t value) {
  return WritePackedFixed32(static_cast<uint32_t>(value));
}

inline absl::Status SerializedMessageWriter::WritePackedSFixed64(
    int64_t value) {
  return WritePackedFixed64(static_cast<uint64_t>(value));
}

inline absl::Status SerializedMessageWriter::WritePackedFloat(float value) {
  return WritePackedFixed32(absl::bit_cast<uint32_t>(value));
}

inline absl::Status SerializedMessageWriter::WritePackedDouble(double value) {
  return WritePackedFixed64(absl::bit_cast<uint64_t>(value));
}

template <typename EnumType,
          std::enable_if_t<std::disjunction_v<std::is_enum<EnumType>,
                                              std::is_integral<EnumType>>,
                           int>>
inline absl::Status SerializedMessageWriter::WritePackedEnum(EnumType value) {
  return WritePackedUInt64(static_cast<uint64_t>(value));
}

template <typename... Values
#if !__cpp_concepts
          ,
          std::enable_if_t<
              std::conjunction_v<
                  IsStringifiable<Values...>,
                  std::negation<std::is_convertible<
                      Values&&, const google::protobuf::MessageLite&>>...>,
              int>
#endif
          >
#if __cpp_concepts
  requires((IsStringifiable<Values>::value && ...) &&
           (!std::is_convertible_v<Values &&,
                                   const google::protobuf::MessageLite&> &&
            ...))
#endif
inline absl::Status SerializedMessageWriter::WriteString(int field_number,
                                                         Values&&... values) {
  if constexpr (HasStringifiedSize<Values...>::value) {
    if (absl::Status status = WriteLengthUnchecked(
            field_number, riegeli::StringifiedSize(values...));
        ABSL_PREDICT_FALSE(!status.ok())) {
      return status;
    }
    if (ABSL_PREDICT_FALSE(!writer().Write(std::forward<Values>(values)...))) {
      return writer().status();
    }
  } else {
    CordWriter cord_writer;
    if (ABSL_PREDICT_FALSE(
            !cord_writer.Write(std::forward<Values>(values)...) ||
            !cord_writer.Close())) {
      return cord_writer.status();
    }
    if (absl::Status status =
            WriteLengthUnchecked(field_number, cord_writer.dest().size());
        ABSL_PREDICT_FALSE(!status.ok())) {
      return status;
    }
    if (ABSL_PREDICT_FALSE(!writer().Write(std::move(cord_writer.dest())))) {
      return writer().status();
    }
  }
  return absl::OkStatus();
}

template <typename ReaderType>
absl::Status SerializedMessageWriter::WriteString(int field_number,
                                                  ReaderSpan<ReaderType> src) {
  if (absl::Status status = WriteLengthUnchecked(field_number, src.length());
      ABSL_PREDICT_FALSE(!status.ok())) {
    return status;
  }
  if (ABSL_PREDICT_FALSE(!src.reader().Copy(src.length(), writer()))) {
    return WriteStringFailed(src.reader(), writer());
  }
  return absl::OkStatus();
}

inline absl::Status SerializedMessageWriter::WriteString(
    int field_number, const google::protobuf::MessageLite& message,
    SerializeMessageOptions options) {
  if (absl::Status status =
          WriteLengthUnchecked(field_number, options.GetByteSize(message));
      ABSL_PREDICT_FALSE(!status.ok())) {
    return status;
  }
  return riegeli::SerializeMessage(message, writer(), options);
}

inline absl::Status SerializedMessageWriter::WriteLengthUnchecked(
    int field_number, Position length) {
  if (ABSL_PREDICT_FALSE(length >
                         uint32_t{std::numeric_limits<int32_t>::max()})) {
    return LengthOverflowError(length);
  }
  const uint32_t tag = MakeTag(field_number, WireType::kLengthDelimited);
  if (ABSL_PREDICT_FALSE(!writer().Push(
          (RIEGELI_IS_CONSTANT(tag) ||
                   (RIEGELI_IS_CONSTANT(tag < 0x80) && tag < 0x80)
               ? LengthVarint32(tag)
               : kMaxLengthVarint32) +
          (RIEGELI_IS_CONSTANT(length) ||
                   (RIEGELI_IS_CONSTANT(length < 0x80) && length < 0x80)
               ? LengthVarint32(IntCast<uint32_t>(length))
               : kMaxLengthVarint32)))) {
    return writer().status();
  }
  char* ptr = WriteVarint32(tag, writer().cursor());
  ptr = WriteVarint32(IntCast<uint32_t>(length), ptr);
  writer().set_cursor(ptr);
  return absl::OkStatus();
}

inline absl::Status SerializedMessageWriter::OpenGroup(int field_number) {
  if (ABSL_PREDICT_FALSE(!WriteVarint32(
          MakeTag(field_number, WireType::kStartGroup), writer()))) {
    return writer().status();
  }
  return absl::OkStatus();
}

inline absl::Status SerializedMessageWriter::CloseGroup(int field_number) {
  if (ABSL_PREDICT_FALSE(!WriteVarint32(
          MakeTag(field_number, WireType::kEndGroup), writer()))) {
    return writer().status();
  }
  return absl::OkStatus();
}

inline Position SerializedMessageWriter::LengthOfInt32(int field_number,
                                                       int32_t value) {
  return LengthOfUInt64(field_number, static_cast<uint64_t>(value));
}

inline Position SerializedMessageWriter::LengthOfInt64(int field_number,
                                                       int64_t value) {
  return LengthOfUInt64(field_number, static_cast<uint64_t>(value));
}

inline Position SerializedMessageWriter::LengthOfUInt32(int field_number,
                                                        uint32_t value) {
  return LengthVarint32(MakeTag(field_number, WireType::kVarint)) +
         LengthVarint32(value);
}

inline Position SerializedMessageWriter::LengthOfUInt64(int field_number,
                                                        uint64_t value) {
  return LengthVarint32(MakeTag(field_number, WireType::kVarint)) +
         LengthVarint64(value);
}

inline Position SerializedMessageWriter::LengthOfSInt32(int field_number,
                                                        int32_t value) {
  return LengthOfUInt32(field_number, EncodeVarintSigned32(value));
}

inline Position SerializedMessageWriter::LengthOfSInt64(int field_number,
                                                        int64_t value) {
  return LengthOfUInt64(field_number, EncodeVarintSigned64(value));
}

inline Position SerializedMessageWriter::LengthOfBool(int field_number) {
  return LengthVarint32(MakeTag(field_number, WireType::kVarint)) + 1;
}

inline Position SerializedMessageWriter::LengthOfFixed32(int field_number) {
  return LengthVarint32(MakeTag(field_number, WireType::kFixed32)) +
         sizeof(uint32_t);
}

inline Position SerializedMessageWriter::LengthOfFixed64(int field_number) {
  return LengthVarint32(MakeTag(field_number, WireType::kFixed64)) +
         sizeof(uint64_t);
}

inline Position SerializedMessageWriter::LengthOfSFixed32(int field_number) {
  return LengthOfFixed32(field_number);
}

inline Position SerializedMessageWriter::LengthOfSFixed64(int field_number) {
  return LengthOfFixed64(field_number);
}

inline Position SerializedMessageWriter::LengthOfFloat(int field_number) {
  return LengthOfFixed32(field_number);
}

inline Position SerializedMessageWriter::LengthOfDouble(int field_number) {
  return LengthOfFixed64(field_number);
}

template <typename EnumType,
          std::enable_if_t<std::disjunction_v<std::is_enum<EnumType>,
                                              std::is_integral<EnumType>>,
                           int>>
inline Position SerializedMessageWriter::LengthOfEnum(int field_number,
                                                      EnumType value) {
  return LengthOfUInt64(field_number, static_cast<uint64_t>(value));
}

inline Position SerializedMessageWriter::LengthOfPackedInt32(int32_t value) {
  return LengthOfPackedUInt64(static_cast<uint64_t>(value));
}

inline Position SerializedMessageWriter::LengthOfPackedInt64(int64_t value) {
  return LengthOfPackedUInt64(static_cast<uint64_t>(value));
}

inline Position SerializedMessageWriter::LengthOfPackedUInt32(uint32_t value) {
  return LengthVarint32(value);
}

inline Position SerializedMessageWriter::LengthOfPackedUInt64(uint64_t value) {
  return LengthVarint64(value);
}

inline Position SerializedMessageWriter::LengthOfPackedSInt32(int32_t value) {
  return LengthOfPackedUInt32(EncodeVarintSigned32(value));
}

inline Position SerializedMessageWriter::LengthOfPackedSInt64(int64_t value) {
  return LengthOfPackedUInt64(EncodeVarintSigned64(value));
}

inline Position SerializedMessageWriter::LengthOfPackedBool() { return 1; }

inline Position SerializedMessageWriter::LengthOfPackedFixed32() {
  return sizeof(uint32_t);
}

inline Position SerializedMessageWriter::LengthOfPackedFixed64() {
  return sizeof(uint64_t);
}

inline Position SerializedMessageWriter::LengthOfPackedSFixed32() {
  return LengthOfPackedFixed32();
}

inline Position SerializedMessageWriter::LengthOfPackedSFixed64() {
  return LengthOfPackedFixed64();
}

inline Position SerializedMessageWriter::LengthOfPackedFloat() {
  return LengthOfPackedFixed32();
}

inline Position SerializedMessageWriter::LengthOfPackedDouble() {
  return LengthOfPackedFixed64();
}

template <typename EnumType,
          std::enable_if_t<std::disjunction_v<std::is_enum<EnumType>,
                                              std::is_integral<EnumType>>,
                           int>>
inline Position SerializedMessageWriter::LengthOfPackedEnum(EnumType value) {
  return LengthOfPackedUInt64(static_cast<uint64_t>(value));
}

template <typename... Values
#if !__cpp_concepts
          ,
          std::enable_if_t<
              std::conjunction_v<
                  IsStringifiable<Values...>,
                  std::negation<std::is_convertible<
                      Values&&, const google::protobuf::MessageLite&>>...>,
              int>
#endif
          >
#if __cpp_concepts
  requires((IsStringifiable<Values>::value && ...) &&
           (!std::is_convertible_v<Values &&,
                                   const google::protobuf::MessageLite&> &&
            ...))
#endif
inline Position SerializedMessageWriter::LengthOfString(
    int field_number, const Values&... values) {
  if constexpr (HasStringifiedSize<Values...>::value) {
    return LengthOfLengthDelimited(field_number,
                                   riegeli::StringifiedSize(values...));
  } else {
    NullWriter null_writer;
    null_writer.Write(values...);
    null_writer.Close();
    return LengthOfLengthDelimited(field_number, null_writer.pos());
  }
}

inline Position SerializedMessageWriter::LengthOfLengthDelimited(
    int field_number, Position length) {
  return LengthVarint32(MakeTag(field_number, WireType::kLengthDelimited)) +
         LengthVarint32(IntCast<uint32_t>(length)) + length;
}

inline Position SerializedMessageWriter::LengthOfOptionalLengthDelimited(
    int field_number, Position length) {
  return length == 0 ? 0 : LengthOfLengthDelimited(field_number, length);
}

inline Position SerializedMessageWriter::LengthOfOpenPlusCloseGroup(
    int field_number) {
  return 2 * LengthVarint32(MakeTag(field_number, WireType::kStartGroup));
}

}  // namespace riegeli

#endif  // RIEGELI_MESSAGES_SERIALIZED_MESSAGE_WRITER_H_


================================================
FILE: riegeli/messages/text_parse_message.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/messages/text_parse_message.h"

#include <memory>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/io/tokenizer.h"
#include "google/protobuf/message.h"
#include "google/protobuf/text_format.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain.h"
#include "riegeli/bytes/chain_reader.h"
#include "riegeli/bytes/cord_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/string_reader.h"
#include "riegeli/messages/parse_message.h"

namespace riegeli {

namespace text_parse_message_internal {

void StringErrorCollector::RecordError(
    int line, google::protobuf::io::ColumnNumber column,
    absl::string_view message) {
  if (line >= 0) {
    absl::StrAppend(&errors_, "\nAt ", line + 1, ":", column + 1, ": ",
                    message);
  } else {
    absl::StrAppend(&errors_, "\n", message);
  }
}

}  // namespace text_parse_message_internal

TextParseMessageOptions::TextParseMessageOptions()
    : error_collector_(std::make_unique<
                       text_parse_message_internal::StringErrorCollector>()) {
  parser_.RecordErrorsTo(error_collector_.get());
}

namespace text_parse_message_internal {

absl::Status TextParseMessageImpl(Reader& src, google::protobuf::Message& dest,
                                  const TextParseMessageOptions& options) {
  ReaderInputStream input_stream(&src);
  google::protobuf::TextFormat::Parser parser = options.parser();
  const bool parse_ok = options.merge() ? parser.Merge(&input_stream, &dest)
                                        : parser.Parse(&input_stream, &dest);
  if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
  if (ABSL_PREDICT_FALSE(!parse_ok)) {
    return src.AnnotateStatus(absl::InvalidArgumentError(
        absl::StrCat("Failed to text-parse message of type ",
                     dest.GetTypeName(), options.error_collector_->errors())));
  }
  return absl::OkStatus();
}

}  // namespace text_parse_message_internal

absl::Status TextParseMessage(BytesRef src, google::protobuf::Message& dest,
                              const TextParseMessageOptions& options) {
  return TextParseMessage(StringReader(src), dest, options);
}

absl::Status TextParseMessage(const Chain& src, google::protobuf::Message& dest,
                              const TextParseMessageOptions& options) {
  return TextParseMessage(ChainReader(&src), dest, options);
}

absl::Status TextParseMessage(const absl::Cord& src,
                              google::protobuf::Message& dest,
                              const TextParseMessageOptions& options) {
  return TextParseMessage(CordReader(&src), dest, options);
}

}  // namespace riegeli


================================================
FILE: riegeli/messages/text_parse_message.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_MESSAGES_TEXT_PARSE_MESSAGE_H_
#define RIEGELI_MESSAGES_TEXT_PARSE_MESSAGE_H_

#include <memory>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/io/tokenizer.h"
#include "google/protobuf/message.h"
#include "google/protobuf/text_format.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

class TextParseMessageOptions;

namespace text_parse_message_internal {

class StringErrorCollector : public google::protobuf::io::ErrorCollector {
 public:
  void RecordError(int line, google::protobuf::io::ColumnNumber column,
                   absl::string_view message) override;

  absl::string_view errors() const { return errors_; }

 private:
  std::string errors_;
};

absl::Status TextParseMessageImpl(Reader& src, google::protobuf::Message& dest,
                                  const TextParseMessageOptions& options);

}  // namespace text_parse_message_internal

class TextParseMessageOptions {
 public:
  TextParseMessageOptions();

  // If `false`, replaces existing contents of the destination, clearing it
  // first.
  //
  // If `true`, merges to existing contents of the destination.
  //
  // Default: `false`.
  TextParseMessageOptions& set_merge(bool merge) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    merge_ = merge;
    return *this;
  }
  TextParseMessageOptions&& set_merge(bool merge) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_merge(merge));
  }
  bool merge() const { return merge_; }

  // Other text parsing options.
  //
  // The default `ErrorCollector` is set up such that errors are returned as
  // `absl::InvalidArgumengError()` instead of being logged.
  google::protobuf::TextFormat::Parser& parser() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return parser_;
  }
  const google::protobuf::TextFormat::Parser& parser() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return parser_;
  }

 private:
  // For `error_collector_`.
  friend absl::Status text_parse_message_internal::TextParseMessageImpl(
      Reader& src, google::protobuf::Message& dest,
      const TextParseMessageOptions& options);

  bool merge_ = false;
  std::unique_ptr<text_parse_message_internal::StringErrorCollector>
      error_collector_;
  google::protobuf::TextFormat::Parser parser_;
};

// Reads a message in text format from the given `Reader`. If successful, the
// entire input will be consumed.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the `Reader`. `Src` must support
// `DependencyRef<Reader*, Src>`, e.g.  `Reader&` (not owned),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `AnyRef<Reader*>` (maybe owned).
//
// Returns status:
//  * `status.ok()`  - success (`dest` is filled)
//  * `!status.ok()` - failure (`dest` is unspecified)
template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int> = 0>
absl::Status TextParseMessage(
    Src&& src, google::protobuf::Message& dest,
    const TextParseMessageOptions& options = TextParseMessageOptions());

// Reads a message in text format from `src`.
//
// Returns status:
//  * `status.ok()`  - success (`dest` is filled)
//  * `!status.ok()` - failure (`dest` is unspecified)
absl::Status TextParseMessage(
    BytesRef src, google::protobuf::Message& dest,
    const TextParseMessageOptions& options = TextParseMessageOptions());
absl::Status TextParseMessage(
    const Chain& src, google::protobuf::Message& dest,
    const TextParseMessageOptions& options = TextParseMessageOptions());
absl::Status TextParseMessage(
    const absl::Cord& src, google::protobuf::Message& dest,
    const TextParseMessageOptions& options = TextParseMessageOptions());

// Implementation details follow.

namespace text_parse_message_internal {

absl::Status TextParseMessageImpl(Reader& src, google::protobuf::Message& dest,
                                  const TextParseMessageOptions& options);

}  // namespace text_parse_message_internal

template <
    typename Src,
    std::enable_if_t<TargetRefSupportsDependency<Reader*, Src>::value, int>>
inline absl::Status TextParseMessage(Src&& src, google::protobuf::Message& dest,
                                     const TextParseMessageOptions& options) {
  DependencyRef<Reader*, Src> src_dep(std::forward<Src>(src));
  if (src_dep.IsOwning()) src_dep->SetReadAllHint(true);
  absl::Status status = text_parse_message_internal::TextParseMessageImpl(
      *src_dep, dest, options);
  if (src_dep.IsOwning()) {
    if (ABSL_PREDICT_TRUE(status.ok())) src_dep->VerifyEnd();
    if (ABSL_PREDICT_FALSE(!src_dep->Close())) status.Update(src_dep->status());
  }
  return status;
}

}  // namespace riegeli

#endif  // RIEGELI_MESSAGES_TEXT_PARSE_MESSAGE_H_


================================================
FILE: riegeli/messages/text_print_message.cc
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/messages/text_print_message.h"

#include <string>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/message.h"
#include "google/protobuf/text_format.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/bytes/chain_writer.h"
#include "riegeli/bytes/cord_writer.h"
#include "riegeli/bytes/string_writer.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/messages/serialize_message.h"

namespace riegeli {

namespace text_print_message_internal {

absl::Status TextPrintMessageImpl(const google::protobuf::Message& src,
                                  Writer& dest,
                                  const TextPrintMessageOptions& options) {
  RIEGELI_ASSERT(options.partial() || src.IsInitialized())
      << "Failed to text-print message of type " << src.GetTypeName()
      << " because it is missing required fields: "
      << src.InitializationErrorString();
  if (options.header()) {
    const google::protobuf::Descriptor* const descriptor = src.GetDescriptor();
    dest.Write("# proto-file: ", descriptor->file()->name(),
               "\n"
               "# proto-message: ",
               descriptor->containing_type() != nullptr
                   ? descriptor->full_name()
                   : descriptor->name(),
               "\n\n");
  }
  WriterOutputStream output_stream(&dest);
  const bool print_ok = options.printer().Print(src, &output_stream);
  if (ABSL_PREDICT_FALSE(!dest.ok())) return dest.status();
  RIEGELI_ASSERT(print_ok)
      << "Failed to text-print message of type " << src.GetTypeName()
      << ": TextFormat::Printer::Print() failed for an unknown reason";
  return absl::OkStatus();
}

}  // namespace text_print_message_internal

absl::Status TextPrintMessage(const google::protobuf::Message& src,
                              std::string& dest,
                              const TextPrintMessageOptions& options) {
  return TextPrintMessage(src, StringWriter(&dest), options);
}

absl::Status TextPrintMessage(const google::protobuf::Message& src, Chain& dest,
                              const TextPrintMessageOptions& options) {
  return TextPrintMessage(src, ChainWriter(&dest), options);
}

absl::Status TextPrintMessage(const google::protobuf::Message& src,
                              absl::Cord& dest,
                              const TextPrintMessageOptions& options) {
  return TextPrintMessage(src, CordWriter(&dest), options);
}

}  // namespace riegeli


================================================
FILE: riegeli/messages/text_print_message.h
================================================
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_MESSAGES_TEXT_PRINT_MESSAGE_H_
#define RIEGELI_MESSAGES_TEXT_PRINT_MESSAGE_H_

#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "google/protobuf/message.h"
#include "google/protobuf/text_format.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

class TextPrintMessageOptions {
 public:
  TextPrintMessageOptions() noexcept {}

  // If `false`, all required fields must be set. This is verified in debug
  // mode.
  //
  // If `true`, missing required fields result in a partial serialized message,
  // not having these fields.
  //
  // Default: `false`.
  TextPrintMessageOptions& set_partial(bool partial) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    partial_ = partial;
    return *this;
  }
  TextPrintMessageOptions&& set_partial(bool partial) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_partial(partial));
  }
  bool partial() const { return partial_; }

  // If `false`, print just the message.
  //
  // If `true`, prefix the output with comments informing about the filename and
  // message name, to aid tools and humans in interpreting the file:
  //
  // ```
  // # proto-file: path/filename.proto
  // # proto-message: MessageName
  // ```
  //
  // See
  // https://developers.google.com/protocol-buffers/docs/text-format-spec#header
  // for details.
  //
  // Default: `false`.
  TextPrintMessageOptions& set_header(bool header) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    header_ = header;
    return *this;
  }
  TextPrintMessageOptions&& set_header(bool header) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_header(header));
  }
  bool header() const { return header_; }

  // Other text printing options.
  google::protobuf::TextFormat::Printer& printer()
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return printer_;
  }
  const google::protobuf::TextFormat::Printer& printer() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return printer_;
  }

 private:
  bool partial_ = false;
  bool header_ = false;
  google::protobuf::TextFormat::Printer printer_;
};

// Writes the message in text format to the given `Writer`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the `Writer`. `Dest` must support
// `DependencyRef<Writer*, Dest>`, e.g. `Writer&` (not owned),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `AnyRef<Writer*>` (maybe owned).
//
// Returns status:
//  * `status.ok()`  - success (`dest` is written to)
//  * `!status.ok()` - failure (`dest` is unspecified)
template <typename Dest,
          std::enable_if_t<TargetRefSupportsDependency<Writer*, Dest>::value,
                           int> = 0>
absl::Status TextPrintMessage(
    const google::protobuf::Message& src, Dest&& dest,
    const TextPrintMessageOptions& options = TextPrintMessageOptions());

// Writes the message in text format to `dest`, clearing any existing data in
// `dest`.
//
// Returns status:
//  * `status.ok()`  - success (`dest` is filled)
//  * `!status.ok()` - failure (`dest` is unspecified)
absl::Status TextPrintMessage(
    const google::protobuf::Message& src, std::string& dest,
    const TextPrintMessageOptions& options = TextPrintMessageOptions());
absl::Status TextPrintMessage(
    const google::protobuf::Message& src, Chain& dest,
    const TextPrintMessageOptions& options = TextPrintMessageOptions());
absl::Status TextPrintMessage(
    const google::protobuf::Message& src, absl::Cord& dest,
    const TextPrintMessageOptions& options = TextPrintMessageOptions());

// Implementation details follow.

namespace text_print_message_internal {

absl::Status TextPrintMessageImpl(const google::protobuf::Message& src,
                                  Writer& dest,
                                  const TextPrintMessageOptions& options);

}  // namespace text_print_message_internal

template <
    typename Dest,
    std::enable_if_t<TargetRefSupportsDependency<Writer*, Dest>::value, int>>
inline absl::Status TextPrintMessage(const google::protobuf::Message& src,
                                     Dest&& dest,
                                     const TextPrintMessageOptions& options) {
  DependencyRef<Writer*, Dest> dest_dep(std::forward<Dest>(dest));
  absl::Status status = text_print_message_internal::TextPrintMessageImpl(
      src, *dest_dep, options);
  if (dest_dep.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_dep->Close())) {
      status.Update(dest_dep->status());
    }
  }
  return status;
}

}  // namespace riegeli

#endif  // RIEGELI_MESSAGES_TEXT_PRINT_MESSAGE_H_


================================================
FILE: riegeli/ordered_varint/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "ordered_varint_reading",
    srcs = [
        "ordered_varint_internal.h",
        "ordered_varint_reading.cc",
    ],
    hdrs = ["ordered_varint_reading.h"],
    deps = [
        "//riegeli/base:assert",
        "//riegeli/bytes:reader",
        "//riegeli/endian:endian_reading",
        "@com_google_absl//absl/base:core_headers",
    ],
)

cc_library(
    name = "ordered_varint_writing",
    srcs = [
        "ordered_varint_internal.h",
        "ordered_varint_writing.cc",
    ],
    hdrs = ["ordered_varint_writing.h"],
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/bytes:writer",
        "//riegeli/endian:endian_writing",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/numeric:bits",
    ],
)


================================================
FILE: riegeli/ordered_varint/ordered_varint_internal.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_ORDERED_VARINT_ORDERED_VARINT_INTERNAL_H_
#define RIEGELI_ORDERED_VARINT_ORDERED_VARINT_INTERNAL_H_

// IWYU pragma: private, include "riegeli/ordered_varint/ordered_varint_reading.h"
// IWYU pragma: private, include "riegeli/ordered_varint/ordered_varint_writing.h"

#include <stddef.h>

namespace riegeli {

inline constexpr size_t kMaxLengthOrderedVarint32 = 5;
inline constexpr size_t kMaxLengthOrderedVarint64 = 9;

}  // namespace riegeli

#endif  // RIEGELI_ORDERED_VARINT_ORDERED_VARINT_INTERNAL_H_


================================================
FILE: riegeli/ordered_varint/ordered_varint_reading.cc
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/ordered_varint/ordered_varint_reading.h"

#include <stdint.h>

#include "absl/base/optimization.h"
#include "riegeli/base/assert.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/endian/endian_reading.h"

namespace riegeli::ordered_varint_internal {

bool ReadOrderedVarint32Slow(Reader& src, uint32_t& dest) {
  RIEGELI_ASSERT_GT(src.available(), 0u)
      << "Failed precondition of ReadOrderedVarint32Slow(): no data available";
  const uint8_t first_byte = static_cast<uint8_t>(*src.cursor());
  RIEGELI_ASSERT_GE(first_byte, 0x80)
      << "Failed precondition of ReadOrderedVarint32Slow(): length is 1";
  if (first_byte < 0xc0) {
    if (ABSL_PREDICT_FALSE(!src.Pull(2))) return false;
    dest = ReadBigEndian<uint16_t>(src.cursor()) & ~(uint16_t{0x80} << 8);
    if (ABSL_PREDICT_FALSE(dest < uint32_t{1} << 7)) return false;
    src.move_cursor(2);
    return true;
  } else if (first_byte < 0xe0) {
    if (ABSL_PREDICT_FALSE(!src.Pull(3))) return false;
    dest = (static_cast<uint32_t>(static_cast<uint8_t>(src.cursor()[0]) &
                                  ~uint8_t{0xc0})
            << (2 * 8)) |
           ReadBigEndian<uint16_t>(src.cursor() + 1);
    if (ABSL_PREDICT_FALSE(dest < uint32_t{1} << (2 * 7))) return false;
    src.move_cursor(3);
    return true;
  } else if (first_byte < 0xf0) {
    if (ABSL_PREDICT_FALSE(!src.Pull(4))) return false;
    dest = ReadBigEndian<uint32_t>(src.cursor()) & ~(uint32_t{0xe0} << (3 * 8));
    if (ABSL_PREDICT_FALSE(dest < uint32_t{1} << (3 * 7))) return false;
    src.move_cursor(4);
    return true;
  } else {
    if (ABSL_PREDICT_FALSE(first_byte > 0xf0)) return false;
    if (ABSL_PREDICT_FALSE(!src.Pull(5))) return false;
    dest = ReadBigEndian<uint32_t>(src.cursor() + 1);
    if (ABSL_PREDICT_FALSE(dest < uint32_t{1} << (4 * 7))) return false;
    src.move_cursor(5);
    return true;
  }
}

bool ReadOrderedVarint64Slow(Reader& src, uint64_t& dest) {
  RIEGELI_ASSERT_GT(src.available(), 0u)
      << "Failed precondition of ReadOrderedVarint64Slow(): no data available";
  const uint8_t first_byte = static_cast<uint8_t>(*src.cursor());
  RIEGELI_ASSERT_GE(first_byte, 0x80)
      << "Failed precondition of ReadOrderedVarint64Slow(): length is 1";
  if (first_byte < 0xc0) {
    if (ABSL_PREDICT_FALSE(!src.Pull(2))) return false;
    dest = ReadBigEndian<uint16_t>(src.cursor()) & ~(uint16_t{0x80} << 8);
    if (ABSL_PREDICT_FALSE(dest < uint32_t{1} << 7)) return false;
    src.move_cursor(2);
    return true;
  } else if (first_byte < 0xe0) {
    if (ABSL_PREDICT_FALSE(!src.Pull(3))) return false;
    dest = (static_cast<uint32_t>(static_cast<uint8_t>(src.cursor()[0]) &
                                  ~uint8_t{0xc0})
            << (2 * 8)) |
           ReadBigEndian<uint16_t>(src.cursor() + 1);
    if (ABSL_PREDICT_FALSE(dest < uint32_t{1} << (2 * 7))) return false;
    src.move_cursor(3);
    return true;
  } else if (first_byte < 0xf0) {
    if (ABSL_PREDICT_FALSE(!src.Pull(4))) return false;
    dest = ReadBigEndian<uint32_t>(src.cursor()) & ~(uint32_t{0xe0} << (3 * 8));
    if (ABSL_PREDICT_FALSE(dest < uint32_t{1} << (3 * 7))) return false;
    src.move_cursor(4);
    return true;
  } else if (first_byte < 0xf8) {
    if (ABSL_PREDICT_FALSE(!src.Pull(5))) return false;
    dest = (static_cast<uint64_t>(static_cast<uint8_t>(src.cursor()[0]) &
                                  ~uint8_t{0xf0})
            << (4 * 8)) |
           ReadBigEndian<uint32_t>(src.cursor() + 1);
    if (ABSL_PREDICT_FALSE(dest < uint64_t{1} << (4 * 7))) return false;
    src.move_cursor(5);
    return true;
  } else if (first_byte < 0xfc) {
    if (ABSL_PREDICT_FALSE(!src.Pull(6))) return false;
    dest = (static_cast<uint64_t>(ReadBigEndian<uint16_t>(src.cursor()) &
                                  ~(uint16_t{0xf8} << 8))
            << (4 * 8)) |
           ReadBigEndian<uint32_t>(src.cursor() + 2);
    if (ABSL_PREDICT_FALSE(dest < uint64_t{1} << (5 * 7))) return false;
    src.move_cursor(6);
    return true;
  } else if (first_byte < 0xfe) {
    if (ABSL_PREDICT_FALSE(!src.Pull(7))) return false;
    dest = (static_cast<uint64_t>(ReadBigEndian<uint32_t>(src.cursor()) &
                                  ~(uint32_t{0xfc} << (3 * 8)))
            << (3 * 8)) |
           ReadBigEndian<uint32_t>(src.cursor() + 3);
    if (ABSL_PREDICT_FALSE(dest < uint64_t{1} << (6 * 7))) return false;
    src.move_cursor(7);
    return true;
  } else if (first_byte < 0xff) {
    if (ABSL_PREDICT_FALSE(!src.Pull(8))) return false;
    dest = ReadBigEndian<uint64_t>(src.cursor()) & ~(uint64_t{0xfe} << (7 * 8));
    if (ABSL_PREDICT_FALSE(dest < uint64_t{1} << (7 * 7))) return false;
    src.move_cursor(8);
    return true;
  } else {
    if (ABSL_PREDICT_FALSE(!src.Pull(9))) return false;
    dest = ReadBigEndian<uint64_t>(src.cursor() + 1);
    if (ABSL_PREDICT_FALSE(dest < uint64_t{1} << (8 * 7))) return false;
    src.move_cursor(9);
    return true;
  }
}

}  // namespace riegeli::ordered_varint_internal


================================================
FILE: riegeli/ordered_varint/ordered_varint_reading.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_ORDERED_VARINT_ORDERED_VARINT_READING_H_
#define RIEGELI_ORDERED_VARINT_ORDERED_VARINT_READING_H_

#include <stdint.h>

#include "absl/base/optimization.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/ordered_varint/ordered_varint_internal.h"  // IWYU pragma: export

namespace riegeli {

// An ordered varint represents an unsigned integer in a variable number of
// bytes, such that smaller values are represented by lexicographically smaller
// strings, and also smaller values tend to be represented by shorter strings.
//
// Decoding a 64-bit value X:
//
// Let F = the first byte of the encoding.
//
// Let N = the number of the highest order one bits in F, plus 1.
// N is in the range [1..9]. X will be decoded from N bytes.
//
// Bits of X, from highest to lowest, consist of:
//  * 8 - N lower order bits of F, if N < 8
//  * the remaining N - 1 bytes of the encoding in big endian
//
// Only the canonical representation is accepted, i.e. the shortest: if N > 1
// then X must be at least 1 << ((N - 1) * 7).

// Reads an ordered varint.
//
// Return values:
//  * `true`                     - success (`dest` is set)
//  * `false` (when `src.ok()`)  - source ends
//                                 (`src` position is unchanged,
//                                 `dest` is undefined)
//  * `false` (when `!src.ok()`) - failure
//                                 (`src` position is unchanged,
//                                 `dest` is undefined)
bool ReadOrderedVarint32(Reader& src, uint32_t& dest);
bool ReadOrderedVarint64(Reader& src, uint64_t& dest);

// Implementation details follow.

namespace ordered_varint_internal {

bool ReadOrderedVarint32Slow(Reader& src, uint32_t& dest);
bool ReadOrderedVarint64Slow(Reader& src, uint64_t& dest);

}  // namespace ordered_varint_internal

inline bool ReadOrderedVarint32(Reader& src, uint32_t& dest) {
  if (ABSL_PREDICT_FALSE(!src.Pull(1, kMaxLengthOrderedVarint32))) return false;
  const uint8_t first_byte = static_cast<uint8_t>(*src.cursor());
  if (ABSL_PREDICT_TRUE(first_byte < 0x80)) {
    dest = first_byte;
    src.move_cursor(1);
    return true;
  }
  return ordered_varint_internal::ReadOrderedVarint32Slow(src, dest);
}

inline bool ReadOrderedVarint64(Reader& src, uint64_t& dest) {
  if (ABSL_PREDICT_FALSE(!src.Pull(1, kMaxLengthOrderedVarint64))) return false;
  const uint8_t first_byte = static_cast<uint8_t>(*src.cursor());
  if (ABSL_PREDICT_TRUE(first_byte < 0x80)) {
    dest = first_byte;
    src.move_cursor(1);
    return true;
  }
  return ordered_varint_internal::ReadOrderedVarint64Slow(src, dest);
}

}  // namespace riegeli

#endif  // RIEGELI_ORDERED_VARINT_ORDERED_VARINT_READING_H_


================================================
FILE: riegeli/ordered_varint/ordered_varint_writing.cc
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/ordered_varint/ordered_varint_writing.h"

#include <stdint.h>

#include "absl/base/optimization.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/endian/endian_writing.h"

namespace riegeli::ordered_varint_internal {

bool WriteOrderedVarint32Slow(uint32_t data, Writer& dest) {
  RIEGELI_ASSERT_GE(data, uint32_t{1} << 7)
      << "Failed precondition of WriteOrderedVarint32Slow(): data too small";
  if (data < uint32_t{1} << (2 * 7)) {
    if (ABSL_PREDICT_FALSE(!dest.Push(2))) return false;
    WriteBigEndian<uint16_t>(IntCast<uint16_t>(data) | uint16_t{0x80} << 8,
                             dest.cursor());
    dest.move_cursor(2);
    return true;
  } else if (data < uint32_t{1} << (3 * 7)) {
    if (ABSL_PREDICT_FALSE(!dest.Push(3))) return false;
    dest.cursor()[0] = static_cast<char>(static_cast<uint8_t>(data >> (2 * 8)) |
                                         uint8_t{0xc0});
    WriteBigEndian<uint16_t>(static_cast<uint16_t>(data), dest.cursor() + 1);
    dest.move_cursor(3);
    return true;
  } else if (data < uint32_t{1} << (4 * 7)) {
    if (ABSL_PREDICT_FALSE(!dest.Push(4))) return false;
    WriteBigEndian<uint32_t>(data | uint32_t{0xe0} << (3 * 8), dest.cursor());
    dest.move_cursor(4);
    return true;
  } else {
    if (ABSL_PREDICT_FALSE(!dest.Push(5))) return false;
    dest.cursor()[0] = static_cast<char>(0xf0);
    WriteBigEndian<uint32_t>(data, dest.cursor() + 1);
    dest.move_cursor(5);
    return true;
  }
}

bool WriteOrderedVarint64Slow(uint64_t data, Writer& dest) {
  RIEGELI_ASSERT_GE(data, uint64_t{1} << 7)
      << "Failed precondition of WriteOrderedVarint64Slow(): data too small";
  if (data < uint64_t{1} << (2 * 7)) {
    if (ABSL_PREDICT_FALSE(!dest.Push(2))) return false;
    WriteBigEndian<uint16_t>(IntCast<uint16_t>(data) | uint16_t{0x80} << 8,
                             dest.cursor());
    dest.move_cursor(2);
    return true;
  } else if (data < uint64_t{1} << (3 * 7)) {
    if (ABSL_PREDICT_FALSE(!dest.Push(3))) return false;
    dest.cursor()[0] = static_cast<char>(static_cast<uint8_t>(data >> (2 * 8)) |
                                         uint8_t{0xc0});
    WriteBigEndian<uint16_t>(static_cast<uint16_t>(data), dest.cursor() + 1);
    dest.move_cursor(3);
    return true;
  } else if (data < uint64_t{1} << (4 * 7)) {
    if (ABSL_PREDICT_FALSE(!dest.Push(4))) return false;
    WriteBigEndian<uint32_t>(
        IntCast<uint32_t>(data) | uint32_t{0xe0} << (3 * 8), dest.cursor());
    dest.move_cursor(4);
    return true;
  } else if (data < uint64_t{1} << (5 * 7)) {
    if (ABSL_PREDICT_FALSE(!dest.Push(5))) return false;
    dest.cursor()[0] = static_cast<char>(static_cast<uint8_t>(data >> (4 * 8)) |
                                         uint8_t{0xf0});
    WriteBigEndian<uint32_t>(static_cast<uint32_t>(data), dest.cursor() + 1);
    dest.move_cursor(5);
    return true;
  } else if (data < uint64_t{1} << (6 * 7)) {
    if (ABSL_PREDICT_FALSE(!dest.Push(6))) return false;
    WriteBigEndian<uint16_t>(
        static_cast<uint16_t>(data >> (4 * 8)) | uint16_t{0xf8} << 8,
        dest.cursor());
    WriteBigEndian<uint32_t>(static_cast<uint32_t>(data), dest.cursor() + 2);
    dest.move_cursor(6);
    return true;
  } else if (data < uint64_t{1} << (7 * 7)) {
    if (ABSL_PREDICT_FALSE(!dest.Push(7))) return false;
    WriteBigEndian<uint32_t>(
        static_cast<uint32_t>(data >> (3 * 8)) | uint32_t{0xfc} << (3 * 8),
        dest.cursor());
    WriteBigEndian<uint32_t>(static_cast<uint32_t>(data), dest.cursor() + 3);
    dest.move_cursor(7);
    return true;
  } else if (data < uint64_t{1} << (8 * 7)) {
    if (ABSL_PREDICT_FALSE(!dest.Push(8))) return false;
    WriteBigEndian<uint64_t>(data | uint64_t{0xfe} << (7 * 8), dest.cursor());
    dest.move_cursor(8);
    return true;
  } else {
    if (ABSL_PREDICT_FALSE(!dest.Push(9))) return false;
    dest.cursor()[0] = static_cast<char>(0xff);
    WriteBigEndian<uint64_t>(data, dest.cursor() + 1);
    dest.move_cursor(9);
    return true;
  }
}

}  // namespace riegeli::ordered_varint_internal


================================================
FILE: riegeli/ordered_varint/ordered_varint_writing.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_ORDERED_VARINT_ORDERED_VARINT_WRITING_H_
#define RIEGELI_ORDERED_VARINT_ORDERED_VARINT_WRITING_H_

#include <stddef.h>
#include <stdint.h>

#include "absl/base/optimization.h"
#include "absl/numeric/bits.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/ordered_varint/ordered_varint_internal.h"  // IWYU pragma: export

namespace riegeli {

// An ordered varint represents an unsigned integer in a variable number of
// bytes, such that smaller values are represented by lexicographically smaller
// strings, and also smaller values tend to be represented by shorter strings.
//
// Encoding a 64-bit value X:
//
// If X == 0, then let L = 0. Otherwise let L = floor(log2(X)). L is in the
// range [0..63].
//
// If L == 63, then let N = 9. Otherwise let N = L / 7 + 1. N is in the range
// [1..9]. X will be encoded into N bytes.
//
// The first byte of the encoding consists of the following bits, from highest
// to lowest:
//  * N - 1 one bits
//  * 1 zero bit, if N < 9
//  * 8 - N bits representing X >> (8 * (N - 1)), if N < 8
//
// The remaining N - 1 bytes represent lower order bytes of X in big endian.

// Writes an ordered varint.
//
// Return values:
//  * `true`  - success (`dest.ok()`)
//  * `false` - failure (`!dest.ok()`)
bool WriteOrderedVarint32(uint32_t data, Writer& dest);
bool WriteOrderedVarint64(uint64_t data, Writer& dest);

// Returns the length needed to write a given value as an ordered varint, which
// is at most `kMaxLengthOrderedVarint{32,64}`.
size_t LengthOrderedVarint32(uint32_t data);
size_t LengthOrderedVarint64(uint64_t data);

// Implementation details follow.

namespace ordered_varint_internal {

bool WriteOrderedVarint32Slow(uint32_t data, Writer& dest);
bool WriteOrderedVarint64Slow(uint64_t data, Writer& dest);

}  // namespace ordered_varint_internal

inline bool WriteOrderedVarint32(uint32_t data, Writer& dest) {
  if (ABSL_PREDICT_TRUE(data < 0x80)) {
    return dest.WriteByte(IntCast<uint8_t>(data));
  }
  return ordered_varint_internal::WriteOrderedVarint32Slow(data, dest);
}

inline bool WriteOrderedVarint64(uint64_t data, Writer& dest) {
  if (ABSL_PREDICT_TRUE(data < 0x80)) {
    return dest.WriteByte(IntCast<uint8_t>(data));
  }
  return ordered_varint_internal::WriteOrderedVarint64Slow(data, dest);
}

inline size_t LengthOrderedVarint32(uint32_t data) {
  const size_t width = IntCast<size_t>(absl::bit_width(data | 1));
  // This is the same as `(width + 6) / 7` for `width` in [1..32],
  // but performs division by a power of 2.
  return (width * 9 + 63) / 64;
}

inline size_t LengthOrderedVarint64(uint64_t data) {
  const size_t width = IntCast<size_t>(absl::bit_width(data | 1));
  // This is the same as `width == 64 ? 9 : (width + 6) / 7`
  // for `width` in [1..64], but performs division by a power of 2
  // and does not need a special case for 63.
  return (width * 9 + 63) / 64;
}

}  // namespace riegeli

#endif  // RIEGELI_ORDERED_VARINT_ORDERED_VARINT_WRITING_H_


================================================
FILE: riegeli/records/BUILD
================================================
load("@com_google_protobuf//bazel:cc_proto_library.bzl", "cc_proto_library")
load("@com_google_protobuf//bazel:proto_library.bzl", "proto_library")
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "record_reader",
    srcs = ["record_reader.cc"],
    hdrs = ["record_reader.h"],
    deps = [
        ":chunk_reader",
        ":record_position",
        ":records_metadata_cc_proto",
        ":skipped_region",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:binary_search",
        "//riegeli/base:chain",
        "//riegeli/base:compare",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:recycling_pool",
        "//riegeli/base:reset",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:chain_backward_writer",
        "//riegeli/bytes:chain_reader",
        "//riegeli/bytes:reader",
        "//riegeli/chunk_encoding:chunk",
        "//riegeli/chunk_encoding:chunk_decoder",
        "//riegeli/chunk_encoding:constants",
        "//riegeli/chunk_encoding:field_projection",
        "//riegeli/chunk_encoding:transpose_decoder",
        "//riegeli/messages:parse_message",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/functional:function_ref",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_protobuf//:protobuf",
    ],
)

cc_library(
    name = "record_writer",
    srcs = ["record_writer.cc"],
    hdrs = ["record_writer.h"],
    deps = [
        ":block",
        ":chunk_writer",
        ":record_position",
        ":records_metadata_cc_proto",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:bytes_ref",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:options_parser",
        "//riegeli/base:parallelism",
        "//riegeli/base:recycling_pool",
        "//riegeli/base:reset",
        "//riegeli/base:stable_dependency",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:chain_writer",
        "//riegeli/bytes:writer",
        "//riegeli/chunk_encoding:chunk",
        "//riegeli/chunk_encoding:chunk_encoder",
        "//riegeli/chunk_encoding:compressor_options",
        "//riegeli/chunk_encoding:constants",
        "//riegeli/chunk_encoding:deferred_encoder",
        "//riegeli/chunk_encoding:simple_encoder",
        "//riegeli/chunk_encoding:transpose_encoder",
        "//riegeli/messages:serialize_message",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/synchronization",
        "@com_google_protobuf//:protobuf",
    ],
)

cc_library(
    name = "record_position",
    srcs = ["record_position.cc"],
    hdrs = ["record_position.h"],
    deps = [
        ":block",
        ":chunk_writer",
        "//riegeli/base:assert",
        "//riegeli/base:compare",
        "//riegeli/base:initializer",
        "//riegeli/base:shared_ptr",
        "//riegeli/base:types",
        "//riegeli/bytes:string_reader",
        "//riegeli/bytes:string_writer",
        "//riegeli/chunk_encoding:chunk",
        "//riegeli/endian:endian_reading",
        "//riegeli/ordered_varint:ordered_varint_reading",
        "//riegeli/ordered_varint:ordered_varint_writing",
        "@com_google_absl//absl/base",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "skipped_region",
    srcs = ["skipped_region.cc"],
    hdrs = ["skipped_region.h"],
    deps = [
        "//riegeli/base:assert",
        "//riegeli/base:string_ref",
        "//riegeli/base:types",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "chunk_reader",
    srcs = ["chunk_reader.cc"],
    hdrs = ["chunk_reader.h"],
    deps = [
        ":block",
        ":skipped_region",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "//riegeli/bytes:reader",
        "//riegeli/chunk_encoding:chunk",
        "//riegeli/chunk_encoding:constants",
        "//riegeli/chunk_encoding:hash",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "chunk_writer",
    srcs = ["chunk_writer.cc"],
    hdrs = ["chunk_writer.h"],
    deps = [
        ":block",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:types",
        "//riegeli/bytes:chain_reader",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:string_reader",
        "//riegeli/bytes:writer",
        "//riegeli/chunk_encoding:chunk",
        "//riegeli/chunk_encoding:constants",
        "//riegeli/chunk_encoding:hash",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "block",
    hdrs = ["block.h"],
    visibility = ["//riegeli:__subpackages__"],
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:types",
        "//riegeli/chunk_encoding:chunk",
        "//riegeli/chunk_encoding:hash",
        "//riegeli/endian:endian_reading",
        "//riegeli/endian:endian_writing",
        "@com_google_absl//absl/strings:string_view",
    ],
)

proto_library(
    name = "records_metadata_proto",
    srcs = ["records_metadata.proto"],
    deps = ["@com_google_protobuf//:descriptor_proto"],
)

cc_proto_library(
    name = "records_metadata_cc_proto",
    deps = [":records_metadata_proto"],
)


================================================
FILE: riegeli/records/README.md
================================================
# Summary

Riegeli/records is a file format for storing a sequence of records.

The format supports sequential writing, appending to a previously created file,
sequential reading, and seeking while reading. Data are optionally compressed,
with special support for the case when records are proto messages. Data
corruption is detected and a reading can be resumed after skipping over a
corrupted region.


================================================
FILE: riegeli/records/block.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_RECORDS_BLOCK_H_
#define RIEGELI_RECORDS_BLOCK_H_

#include <stddef.h>
#include <stdint.h>

#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/types.h"
#include "riegeli/chunk_encoding/chunk.h"
#include "riegeli/chunk_encoding/hash.h"
#include "riegeli/endian/endian_reading.h"
#include "riegeli/endian/endian_writing.h"

namespace riegeli::records_internal {

class BlockHeader {
 public:
  BlockHeader() = default;

  explicit BlockHeader(uint64_t previous_chunk, uint64_t next_chunk) {
    set_previous_chunk(previous_chunk);
    set_next_chunk(next_chunk);
    set_header_hash(computed_header_hash());
  }

  BlockHeader(const BlockHeader& that) = default;
  BlockHeader& operator=(const BlockHeader& that) = default;

  char* bytes() { return bytes_; }
  const char* bytes() const { return bytes_; }
  static constexpr size_t size() { return sizeof(bytes_); }

  uint64_t computed_header_hash() const {
    return chunk_encoding_internal::Hash(absl::string_view(
        bytes() + sizeof(uint64_t), size() - sizeof(uint64_t)));
  }
  uint64_t stored_header_hash() const {
    return ReadLittleEndian<uint64_t>(bytes_);
  }
  uint64_t previous_chunk() const {
    return ReadLittleEndian<uint64_t>(bytes_ + sizeof(uint64_t));
  }
  uint64_t next_chunk() const {
    return ReadLittleEndian<uint64_t>(bytes_ + 2 * sizeof(uint64_t));
  }

 private:
  void set_header_hash(uint64_t value) {
    WriteLittleEndian<uint64_t>(value, bytes_);
  }
  void set_previous_chunk(uint64_t value) {
    WriteLittleEndian<uint64_t>(value, bytes_ + sizeof(uint64_t));
  }
  void set_next_chunk(uint64_t value) {
    WriteLittleEndian<uint64_t>(value, bytes_ + 2 * sizeof(uint64_t));
  }

  // Representation (Little Endian):
  //  - `uint64_t`: `header_hash`
  //  - `uint64_t`: `previous_chunk`
  //  - `uint64_t`: `next_chunk`
  char bytes_[3 * sizeof(uint64_t)];
};

inline constexpr Position kBlockSize = Position{1} << 16;

inline constexpr Position kUsableBlockSize = kBlockSize - BlockHeader::size();

// Whether `pos` is a block boundary (immediately before a block header).
inline bool IsBlockBoundary(Position pos) { return pos % kBlockSize == 0; }

// The nearest block boundary at or before `pos`.
inline Position RoundDownToBlockBoundary(Position pos) {
  return pos - pos % kBlockSize;
}

// How many bytes remain until the end of the block (0 at a block boundary).
inline Position RemainingInBlock(Position pos) { return (-pos) % kBlockSize; }

// Whether `pos` is a possible chunk boundary (not inside nor immediately after
// a block header).
inline bool IsPossibleChunkBoundary(Position pos) {
  return RemainingInBlock(pos) < kUsableBlockSize;
}

// The nearest possible chunk boundary at or after `pos` (chunk boundaries are
// not valid inside or immediately after a block header).
inline Position RoundUpToPossibleChunkBoundary(Position pos) {
  return pos + SaturatingSub(RemainingInBlock(pos), kUsableBlockSize - 1);
}

// If `pos` is immediately before or inside a block header, how many bytes
// remain until the end of the block header, otherwise 0.
inline size_t RemainingInBlockHeader(Position pos) {
  return SaturatingSub(BlockHeader::size(), pos % kBlockSize);
}

// For a chunk beginning at `chunk_begin`, the position after `length`, adding
// intervening block headers.
inline Position AddWithOverhead(Position chunk_begin, Position length) {
  RIEGELI_ASSERT_LT(RemainingInBlock(chunk_begin), kUsableBlockSize)
      << "Failed precondition of AddWithOverhead(): invalid chunk boundary";
  const Position num_overhead_blocks =
      (length + (chunk_begin + kUsableBlockSize - 1) % kBlockSize) /
      kUsableBlockSize;
  return chunk_begin + length + num_overhead_blocks * BlockHeader::size();
}

// For a chunk beginning at `chunk_begin`, the length until `pos`, subtracting
// intervening block headers.
inline Position DistanceWithoutOverhead(Position chunk_begin, Position pos) {
  RIEGELI_ASSERT_LE(chunk_begin, pos)
      << "Failed precondition of DistanceWithoutOverhead(): "
         "positions in the wrong order";
  const Position num_overhead_blocks =
      pos / kBlockSize - chunk_begin / kBlockSize;
  return (pos - UnsignedMin(pos % kBlockSize, BlockHeader::size())) -
         (chunk_begin -
          UnsignedMin(chunk_begin % kBlockSize, BlockHeader::size())) -
         num_overhead_blocks * BlockHeader::size();
}

// The position after a chunk which begins at `chunk_begin`.
inline Position ChunkEnd(const ChunkHeader& header, Position chunk_begin) {
  return UnsignedMax(
      AddWithOverhead(chunk_begin, header.size() + header.data_size()),
      RoundUpToPossibleChunkBoundary(chunk_begin + header.num_records()));
}

}  // namespace riegeli::records_internal

#endif  // RIEGELI_RECORDS_BLOCK_H_


================================================
FILE: riegeli/records/chunk_reader.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/records/chunk_reader.h"

#include <stddef.h>
#include <stdint.h>

#include <optional>
#include <string>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/chunk_encoding/chunk.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/chunk_encoding/hash.h"
#include "riegeli/records/block.h"
#include "riegeli/records/skipped_region.h"

namespace riegeli {

void DefaultChunkReaderBase::Initialize(Reader* src) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of DefaultChunkReader: null Reader pointer";
  pos_ = src->pos();
  if (ABSL_PREDICT_FALSE(!src->ok()) && src->available() == 0) {
    FailWithoutAnnotation(src->status());
    return;
  }
  if (ABSL_PREDICT_FALSE(!records_internal::IsPossibleChunkBoundary(pos_))) {
    recoverable_ = Recoverable::kFindChunk;
    recoverable_pos_ = pos_;
    Fail(absl::InvalidArgumentError(
        absl::StrCat("Invalid chunk boundary: ", pos_)));
  }
}

void DefaultChunkReaderBase::Done() {
  recoverable_ = Recoverable::kNo;
  recoverable_pos_ = 0;
  if (ABSL_PREDICT_FALSE(truncated_)) {
    Reader& src = *SrcReader();
    RIEGELI_ASSERT_GT(src.pos(), pos_)
        << "Failed invariant of DefaultChunkReader: a chunk beginning must "
           "have been read for the chunk to be considered incomplete";
    recoverable_ = Recoverable::kHaveChunk;
    recoverable_pos_ = src.pos();
    const Position chunk_header_read =
        records_internal::DistanceWithoutOverhead(pos_, recoverable_pos_);
    Fail(absl::InvalidArgumentError(absl::StrCat(
        "Truncated Riegeli/records file, incomplete chunk at ", pos_,
        " with length ", recoverable_pos_ - pos_,
        chunk_header_read < chunk_.header.size()
            ? std::string()
            : absl::StrCat(
                  " < ",
                  records_internal::ChunkEnd(chunk_.header, pos_) - pos_))));
  }
  chunk_.Reset();
}

inline bool DefaultChunkReaderBase::FailReading(const Reader& src) {
  if (ABSL_PREDICT_FALSE(!src.ok())) return FailWithoutAnnotation(src.status());
  if (ABSL_PREDICT_FALSE(src.pos() > pos_)) truncated_ = true;
  return false;
}

inline bool DefaultChunkReaderBase::FailSeeking(const Reader& src,
                                                Position new_pos) {
  if (ABSL_PREDICT_FALSE(!src.ok())) return FailWithoutAnnotation(src.status());
  recoverable_ = Recoverable::kFindChunk;
  recoverable_pos_ = src.pos();
  return Fail(absl::InvalidArgumentError(absl::StrCat(
      "Position ", new_pos, " exceeds file size: ", recoverable_pos_)));
}

absl::Status DefaultChunkReaderBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    Reader& src = *SrcReader();
    return src.AnnotateStatus(std::move(status));
  }
  return status;
}

bool DefaultChunkReaderBase::CheckFileFormat() {
  return PullChunkHeader(nullptr);
}

bool DefaultChunkReaderBase::ReadChunk(Chunk& chunk) {
  if (ABSL_PREDICT_FALSE(!PullChunkHeader(nullptr))) return false;
  Reader& src = *SrcReader();
  const Position chunk_end = records_internal::ChunkEnd(chunk_.header, pos_);
  src.ReadHint(SaturatingIntCast<size_t>(chunk_end - src.pos()),
               SaturatingIntCast<size_t>(records_internal::AddWithOverhead(
                                             chunk_end, ChunkHeader::size()) -
                                         src.pos()));

  while (chunk_.data.size() < chunk_.header.data_size()) {
    if (records_internal::RemainingInBlockHeader(src.pos()) > 0) {
      const Position block_begin =
          records_internal::RoundDownToBlockBoundary(src.pos());
      if (ABSL_PREDICT_FALSE(!ReadBlockHeader())) return false;
      if (ABSL_PREDICT_FALSE(block_header_.previous_chunk() !=
                             block_begin - pos_)) {
        if (block_header_.next_chunk() <= records_internal::kBlockSize) {
          // Trust the rest of the block header: skip to the next chunk.
          recoverable_ = Recoverable::kHaveChunk;
          recoverable_pos_ = block_begin + block_header_.next_chunk();
        } else {
          // Skip to the next block header.
          recoverable_ = Recoverable::kFindChunk;
          recoverable_pos_ = src.pos();
        }
        return Fail(absl::InvalidArgumentError(absl::StrCat(
            "Invalid Riegeli/records file: chunk boundary is ", pos_,
            " but block header at ", block_begin,
            " implies a different previous chunk boundary: ",
            block_begin >= block_header_.previous_chunk()
                ? absl::StrCat(block_begin - block_header_.previous_chunk())
                : absl::StrCat("-",
                               block_header_.previous_chunk() - block_begin))));
      }
      if (ABSL_PREDICT_FALSE(block_header_.next_chunk() !=
                             chunk_end - block_begin)) {
        recoverable_ = Recoverable::kFindChunk;
        recoverable_pos_ = src.pos();
        return Fail(absl::InvalidArgumentError(
            absl::StrCat("Invalid Riegeli/records file: chunk boundary is ",
                         chunk_end, " but block header at ", block_begin,
                         " implies a different next chunk boundary: ",
                         block_begin + block_header_.next_chunk())));
      }
    }
    if (ABSL_PREDICT_FALSE(!src.ReadAndAppend(
            IntCast<size_t>(
                UnsignedMin(chunk_.header.data_size() - chunk_.data.size(),
                            records_internal::RemainingInBlock(src.pos()))),
            chunk_.data))) {
      return FailReading(src);
    }
  }

  if (ABSL_PREDICT_FALSE(!src.Seek(chunk_end))) return FailReading(src);

  const uint64_t computed_data_hash =
      chunk_encoding_internal::Hash(chunk_.data);
  if (ABSL_PREDICT_FALSE(computed_data_hash != chunk_.header.data_hash())) {
    // `Recoverable::kHaveChunk`, not `Recoverable::kFindChunk`, because while
    // chunk data are invalid, chunk header has a correct hash, and thus the
    // next chunk is believed to be present after this chunk.
    recoverable_ = Recoverable::kHaveChunk;
    recoverable_pos_ = chunk_end;
    return Fail(absl::InvalidArgumentError(absl::StrCat(
        "Corrupted Riegeli/records file: chunk data hash mismatch (computed 0x",
        absl::Hex(computed_data_hash, absl::PadSpec::kZeroPad16), ", stored 0x",
        absl::Hex(chunk_.header.data_hash(), absl::PadSpec::kZeroPad16),
        "), chunk at ", pos_, " with length ", chunk_end - pos_)));
  }

  chunk = std::move(chunk_);
  pos_ = chunk_end;
  chunk_.Clear();
  return true;
}

bool DefaultChunkReaderBase::PullChunkHeader(const ChunkHeader** chunk_header) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  truncated_ = false;

  if (ABSL_PREDICT_FALSE(src.pos() < pos_)) {
    // Source ended in a skipped region.
    if (!src.Pull()) {
      // Source still ends at the same position.
      if (ABSL_PREDICT_FALSE(!src.ok())) {
        return FailWithoutAnnotation(src.status());
      }
      return false;
    }
    // Source has grown. Recovery can continue.
    recoverable_ = Recoverable::kHaveChunk;
    recoverable_pos_ = pos_;
    pos_ = src.pos();
    return Fail(absl::InvalidArgumentError(absl::StrCat(
        "Riegeli/records file ended at ", pos_,
        " but has grown and will be skipped until ", recoverable_pos_)));
  }

  const Position chunk_header_read =
      records_internal::DistanceWithoutOverhead(pos_, src.pos());
  if (chunk_header_read < chunk_.header.size()) {
    if (ABSL_PREDICT_FALSE(!ReadChunkHeader())) return false;
  }
  if (chunk_header != nullptr) *chunk_header = &chunk_.header;
  return true;
}

inline bool DefaultChunkReaderBase::ReadChunkHeader() {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of DefaultChunkReaderBase::ReadChunkHeader()";
  Reader& src = *SrcReader();
  RIEGELI_ASSERT_LT(records_internal::DistanceWithoutOverhead(pos_, src.pos()),
                    chunk_.header.size())
      << "Failed precondition of DefaultChunkReaderBase::ReadChunkHeader(): "
         "chunk header already read";
  size_t remaining_length;
  size_t length_to_read;
  do {
    if (records_internal::RemainingInBlockHeader(src.pos()) > 0) {
      const Position block_begin =
          records_internal::RoundDownToBlockBoundary(src.pos());
      if (ABSL_PREDICT_FALSE(!ReadBlockHeader())) return false;
      if (ABSL_PREDICT_FALSE(block_header_.previous_chunk() !=
                             block_begin - pos_)) {
        if (block_header_.next_chunk() <= records_internal::kBlockSize) {
          // Trust the rest of the block header: skip to the next chunk.
          recoverable_ = Recoverable::kHaveChunk;
          recoverable_pos_ = block_begin + block_header_.next_chunk();
        } else {
          // Skip to the next block header.
          recoverable_ = Recoverable::kFindChunk;
          recoverable_pos_ = src.pos();
        }
        return Fail(absl::InvalidArgumentError(absl::StrCat(
            "Invalid Riegeli/records file: chunk boundary is ", pos_,
            " but block header at ", block_begin,
            " implies a different previous chunk boundary: ",
            block_begin >= block_header_.previous_chunk()
                ? absl::StrCat(block_begin - block_header_.previous_chunk())
                : absl::StrCat("-",
                               block_header_.previous_chunk() - block_begin))));
      }
    }
    const size_t chunk_header_read = IntCast<size_t>(
        records_internal::DistanceWithoutOverhead(pos_, src.pos()));
    remaining_length = chunk_.header.size() - chunk_header_read;
    length_to_read = UnsignedMin(remaining_length,
                                 records_internal::RemainingInBlock(src.pos()));
    if (ABSL_PREDICT_FALSE(!src.Read(
            length_to_read, chunk_.header.bytes() + chunk_header_read))) {
      return FailReading(src);
    }
  } while (length_to_read < remaining_length);

  const uint64_t computed_header_hash = chunk_.header.computed_header_hash();
  if (ABSL_PREDICT_FALSE(computed_header_hash !=
                         chunk_.header.stored_header_hash())) {
    recoverable_ = Recoverable::kFindChunk;
    recoverable_pos_ = src.pos();
    return Fail(absl::InvalidArgumentError(absl::StrCat(
        "Corrupted Riegeli/records file: chunk header hash mismatch "
        "(computed 0x",
        absl::Hex(computed_header_hash, absl::PadSpec::kZeroPad16),
        ", stored 0x",
        absl::Hex(chunk_.header.stored_header_hash(),
                  absl::PadSpec::kZeroPad16),
        "), chunk at ", pos_)));
  }
  if (records_internal::RemainingInBlock(pos_) < chunk_.header.size()) {
    // The chunk header was interrupted by a block header. Both headers have
    // been read so verify that they agree.
    const Position block_begin =
        pos_ + records_internal::RemainingInBlock(pos_);
    const Position chunk_end = records_internal::ChunkEnd(chunk_.header, pos_);
    if (ABSL_PREDICT_FALSE(block_header_.next_chunk() !=
                           chunk_end - block_begin)) {
      recoverable_ = Recoverable::kFindChunk;
      recoverable_pos_ = src.pos();
      return Fail(absl::InvalidArgumentError(
          absl::StrCat("Invalid Riegeli/records file: chunk boundary is ",
                       chunk_end, " but block header at ", block_begin,
                       " implies a different next chunk boundary: ",
                       block_begin + block_header_.next_chunk())));
    }
  }
  if (pos_ == 0) {
    // Verify file signature.
    if (ABSL_PREDICT_FALSE(chunk_.header.data_size() != 0 ||
                           chunk_.header.chunk_type() !=
                               ChunkType::kFileSignature ||
                           chunk_.header.num_records() != 0 ||
                           chunk_.header.decoded_data_size() != 0)) {
      recoverable_ = Recoverable::kFindChunk;
      recoverable_pos_ = src.pos();
      return Fail(absl::InvalidArgumentError(
          "Invalid Riegeli/records file: missing file signature"));
    }
  }
  return true;
}

inline bool DefaultChunkReaderBase::ReadBlockHeader() {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of DefaultChunkReaderBase::ReadBlockHeader()";
  Reader& src = *SrcReader();
  const size_t remaining_length =
      records_internal::RemainingInBlockHeader(src.pos());
  RIEGELI_ASSERT_GT(remaining_length, 0u)
      << "Failed precondition of DefaultChunkReaderBase::ReadBlockHeader(): "
         "not before nor inside a block header";
  if (ABSL_PREDICT_FALSE(!src.Read(
          remaining_length,
          block_header_.bytes() + block_header_.size() - remaining_length))) {
    return FailReading(src);
  }
  const uint64_t computed_header_hash = block_header_.computed_header_hash();
  if (ABSL_PREDICT_FALSE(computed_header_hash !=
                         block_header_.stored_header_hash())) {
    recoverable_ = Recoverable::kFindChunk;
    recoverable_pos_ = src.pos();
    return Fail(absl::InvalidArgumentError(absl::StrCat(
        "Corrupted Riegeli/records file: block header hash mismatch "
        "(computed 0x",
        absl::Hex(computed_header_hash, absl::PadSpec::kZeroPad16),
        ", stored 0x",
        absl::Hex(block_header_.stored_header_hash(),
                  absl::PadSpec::kZeroPad16),
        "), block at ",
        records_internal::RoundDownToBlockBoundary(recoverable_pos_))));
  }
  return true;
}

bool DefaultChunkReaderBase::Recover(SkippedRegion* skipped_region) {
  if (recoverable_ == Recoverable::kNo) return false;
  Reader& src = *SrcReader();
  const Position region_begin = pos_;
again:
  RIEGELI_ASSERT(!ok()) << "Failed invariant of DefaultChunkReader: "
                           "recovery applicable but DefaultChunkReader OK";
  const Recoverable recoverable = recoverable_;
  recoverable_ = Recoverable::kNo;
  Position recoverable_pos = recoverable_pos_;
  recoverable_pos_ = 0;
  std::string saved_message(status().message());
  MarkNotFailed();
  chunk_.Clear();
  if (recoverable == Recoverable::kHaveChunk) {
    pos_ = recoverable_pos;
    if (ok()) {
      if (ABSL_PREDICT_FALSE(!src.Seek(pos_))) {
        if (ABSL_PREDICT_FALSE(!src.ok())) {
          return FailWithoutAnnotation(src.status());
        }
        if (skipped_region != nullptr) {
          *skipped_region =
              SkippedRegion(region_begin, src.pos(), std::move(saved_message));
        }
        return true;
      }
      if (ABSL_PREDICT_FALSE(
              !records_internal::IsPossibleChunkBoundary(pos_))) {
        recoverable_ = Recoverable::kFindChunk;
        recoverable_pos_ = pos_;
        goto again;
      }
    }
    if (skipped_region != nullptr) {
      *skipped_region =
          SkippedRegion(region_begin, pos_, std::move(saved_message));
    }
    return true;
  }
  RIEGELI_ASSERT_OK(*this)
      << "Failed invariant of DefaultChunkReader: "
         "chunk boundary not reached yet but DefaultChunkReader is closed";
  pos_ = recoverable_pos;

find_chunk:
  pos_ += records_internal::RemainingInBlock(pos_);
  if (ABSL_PREDICT_FALSE(!src.Seek(pos_))) {
    if (ABSL_PREDICT_FALSE(!src.ok())) {
      return FailWithoutAnnotation(src.status());
    }
    if (skipped_region != nullptr) {
      *skipped_region =
          SkippedRegion(region_begin, src.pos(), std::move(saved_message));
    }
    return true;
  }
  if (ABSL_PREDICT_FALSE(!ReadBlockHeader())) {
    if (recoverable_ != Recoverable::kNo) goto again;
    if (ABSL_PREDICT_FALSE(!src.ok())) {
      return FailWithoutAnnotation(src.status());
    }
  } else if (block_header_.previous_chunk() == 0) {
    // A chunk boundary coincides with block boundary. Recovery is done.
  } else {
    pos_ += block_header_.next_chunk();
    if (ABSL_PREDICT_FALSE(!records_internal::IsPossibleChunkBoundary(pos_))) {
      goto find_chunk;
    }
    if (ABSL_PREDICT_FALSE(!src.Seek(pos_))) {
      if (ABSL_PREDICT_FALSE(!src.ok())) {
        return FailWithoutAnnotation(src.status());
      }
      if (skipped_region != nullptr) {
        *skipped_region =
            SkippedRegion(region_begin, src.pos(), std::move(saved_message));
      }
      return true;
    }
  }
  if (skipped_region != nullptr) {
    *skipped_region =
        SkippedRegion(region_begin, pos_, std::move(saved_message));
  }
  return true;
}

bool DefaultChunkReaderBase::SupportsRandomAccess() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsRandomAccess();
}

bool DefaultChunkReaderBase::Seek(Position new_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (pos_ == new_pos) return true;
  Reader& src = *SrcReader();
  truncated_ = false;
  pos_ = new_pos;
  chunk_.Clear();
  if (ABSL_PREDICT_FALSE(!src.Seek(pos_))) return FailSeeking(src, pos_);
  if (ABSL_PREDICT_FALSE(!records_internal::IsPossibleChunkBoundary(pos_))) {
    recoverable_ = Recoverable::kFindChunk;
    recoverable_pos_ = pos_;
    return Fail(absl::InvalidArgumentError(
        absl::StrCat("Invalid chunk boundary: ", pos_)));
  }
  return true;
}

bool DefaultChunkReaderBase::SeekToChunkContaining(Position new_pos) {
  return SeekToChunk<WhichChunk::kContaining>(new_pos);
}

bool DefaultChunkReaderBase::SeekToChunkBefore(Position new_pos) {
  return SeekToChunk<WhichChunk::kBefore>(new_pos);
}

bool DefaultChunkReaderBase::SeekToChunkAfter(Position new_pos) {
  return SeekToChunk<WhichChunk::kAfter>(new_pos);
}

template <DefaultChunkReaderBase::WhichChunk which_chunk>
bool DefaultChunkReaderBase::SeekToChunk(Position new_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (pos_ == new_pos) return true;
  Reader& src = *SrcReader();
  truncated_ = false;
  const Position block_begin =
      records_internal::RoundDownToBlockBoundary(new_pos);
  Position chunk_begin;
  if (pos_ < new_pos) {
    // The current chunk begins before `new_pos`. If it also ends at or after
    // `block_begin`, it is better to start searching from the current position
    // than to seek back to `block_begin`.
    if (ABSL_PREDICT_FALSE(!PullChunkHeader(nullptr))) {
      if (ABSL_PREDICT_FALSE(!ok())) return false;
      truncated_ = false;
      return FailSeeking(src, new_pos);
    }
    if (which_chunk == WhichChunk::kContaining &&
        pos_ + chunk_.header.num_records() > new_pos) {
      return true;
    }
    const Position chunk_end = records_internal::ChunkEnd(chunk_.header, pos_);
    if (which_chunk == WhichChunk::kBefore && chunk_end > new_pos) return true;
    if (chunk_end < block_begin) {
      // The current chunk ends too early. Skip to `block_begin`.
      goto read_block_header;
    }
    chunk_begin = chunk_end;
    chunk_.Clear();
  } else {
  read_block_header:
    pos_ = block_begin;
    chunk_.Clear();
    if (ABSL_PREDICT_FALSE(!src.Seek(pos_))) return FailSeeking(src, new_pos);
    if (ABSL_PREDICT_FALSE(!ReadBlockHeader())) {
      if (ABSL_PREDICT_FALSE(!ok())) return false;
      if (ABSL_PREDICT_TRUE(!truncated_)) {
        // File ends at this block boundary, so a chunk ends here too.
        if (ABSL_PREDICT_TRUE(pos_ >= new_pos)) return true;
      }
      truncated_ = false;
      return FailSeeking(src, new_pos);
    }
    if (block_header_.previous_chunk() == 0) {
      // A chunk boundary coincides with block boundary. The current position is
      // already before the chunk header; start searching from this chunk,
      // skipping seeking back and reading the block header again.
      goto check_current_chunk;
    }
    chunk_begin = block_begin + block_header_.next_chunk();
    if (which_chunk != WhichChunk::kAfter && chunk_begin > new_pos) {
      // `new_pos` is inside the chunk which contains this block boundary, so
      // start the search from this chunk instead of the next chunk.
      if (ABSL_PREDICT_FALSE(block_header_.previous_chunk() > block_begin)) {
        recoverable_ = Recoverable::kFindChunk;
        recoverable_pos_ = src.pos();
        return Fail(absl::InvalidArgumentError(absl::StrCat(
            "Invalid Riegeli/records file: block header at ", block_begin,
            " implies a negative previous chunk boundary: -",
            block_header_.previous_chunk() - block_begin)));
      }
      chunk_begin = block_begin - block_header_.previous_chunk();
    }
    if (ABSL_PREDICT_FALSE(
            !records_internal::IsPossibleChunkBoundary(chunk_begin))) {
      recoverable_ = Recoverable::kFindChunk;
      recoverable_pos_ = src.pos();
      return Fail(absl::InvalidArgumentError(absl::StrCat(
          "Invalid Riegeli/records file: block header at ", block_begin,
          " implies an invalid chunk boundary: ", chunk_begin)));
    }
  }

  for (;;) {
    pos_ = chunk_begin;
    if (ABSL_PREDICT_FALSE(!src.Seek(pos_))) return FailSeeking(src, new_pos);
  check_current_chunk:
    if (pos_ >= new_pos) return true;
    if (ABSL_PREDICT_FALSE(!ReadChunkHeader())) {
      if (ABSL_PREDICT_FALSE(!ok())) return false;
      truncated_ = false;
      return FailSeeking(src, new_pos);
    }
    if (which_chunk == WhichChunk::kContaining &&
        pos_ + chunk_.header.num_records() > new_pos) {
      return true;
    }
    const Position chunk_end = records_internal::ChunkEnd(chunk_.header, pos_);
    if (which_chunk == WhichChunk::kBefore && chunk_end > new_pos) return true;
    chunk_begin = chunk_end;
  }
}

std::optional<Position> DefaultChunkReaderBase::Size() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  Reader& src = *SrcReader();
  const std::optional<Position> size = src.Size();
  if (ABSL_PREDICT_FALSE(size == std::nullopt)) {
    FailWithoutAnnotation(src.status());
  }
  return size;
}

}  // namespace riegeli


================================================
FILE: riegeli/records/chunk_reader.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_RECORDS_CHUNK_READER_H_
#define RIEGELI_RECORDS_CHUNK_READER_H_

#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/chunk_encoding/chunk.h"
#include "riegeli/records/block.h"
#include "riegeli/records/skipped_region.h"

namespace riegeli {

// Template parameter independent part of `DefaultChunkReader`.
class DefaultChunkReaderBase : public Object {
 public:
  // Returns the Riegeli/records file being read from. Unchanged by `Close()`.
  virtual Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Ensures that the file looks like a valid Riegeli/Records file.
  //
  // Reading the file already checks whether it is valid. `CheckFileFormat()`
  // can verify this before (or instead of) performing other operations.
  //
  // Return values:
  //  * `true`                 - success
  //  * `false` (when `ok()`)  - source ends
  //  * `false` (when `!ok()`) - failure
  bool CheckFileFormat();

  // Reads the next chunk.
  //
  // Return values:
  //  * `true`                 - success (`chunk` is set)
  //  * `false` (when `ok()`)  - source ends
  //  * `false` (when `!ok()`) - failure
  bool ReadChunk(Chunk& chunk);

  // Reads the next chunk header, from same chunk which will be read by an
  // immediately following `ReadChunk()`.
  //
  // If `chunk_header != nullptr`, `*chunk_header` is set to the chunk header,
  // valid until the next non-const function of the `ChunkReader`.
  //
  // Return values:
  //  * `true`                 - success (`*chunk_header` is set)
  //  * `false` (when `ok()`)  - source ends
  //  * `false` (when `!ok()`) - failure
  bool PullChunkHeader(const ChunkHeader** chunk_header);

  // If `!ok()` and the failure was caused by invalid file contents, then
  // `Recover()` tries to recover from the failure and allow reading again by
  // skipping over the invalid region.
  //
  // If `Close()` failed and the failure was caused by truncated file contents,
  // then `Recover()` returns `true`. The `ChunkReader` remains closed.
  //
  // If `ok()`, or if `!ok()` but the failure was not caused by invalid file
  // contents, then `Recover()` returns false.
  //
  // If `skipped_region != nullptr`, `*skipped_region` is set to the position of
  // the skipped region on success.
  //
  // Return values:
  //  * `true`  - success
  //  * `false` - failure not caused by invalid file contents
  bool Recover(SkippedRegion* skipped_region = nullptr);

  // Returns the current position, which is a chunk boundary (except that if
  // the source ends in a skipped region, it can be greater than file size and
  // it can be a block boundary).
  //
  // `ReadChunk()` and `PullChunkHeader()` return a chunk which begins at
  // `pos()` if they succeed.
  //
  // `pos()` is unchanged by `Close()`.
  Position pos() const { return pos_; }

  // Returns `true` if this `ChunkReader` supports `Seek()`,
  // `SeekToChunkContaining()`, `SeekToChunkAfter()`, and `Size()`.
  bool SupportsRandomAccess();

  // Seeks to `new_pos`, which should be a chunk boundary.
  //
  // Return values:
  //  * `true`  - success
  //  * `false` - failure (`!ok()`)
  bool Seek(Position new_pos);

  // Seeks to the nearest chunk boundary before or at `new_pos` if the position
  // corresponds to some numeric record position in the following chunk (i.e. is
  // less than `num_records` bytes after chunk beginning), otherwise seeks to
  // the nearest chunk boundary at or after the given position.
  //
  // Return values:
  //  * `true`  - success
  //  * `false` - failure (`!ok()`)
  bool SeekToChunkContaining(Position new_pos);

  // Seeks to the nearest chunk boundary at or before `new_pos`.
  //
  // Return values:
  //  * `true`  - success
  //  * `false` - failure (`!ok()`)
  bool SeekToChunkBefore(Position new_pos);

  // Seeks to the nearest chunk boundary at or after `new_pos`.
  //
  // Return values:
  //  * `true`  - success
  //  * `false` - failure (`!ok()`)
  bool SeekToChunkAfter(Position new_pos);

  // Returns the size of the file, i.e. the position corresponding to its end.
  //
  // Returns `std::nullopt` on failure (`!ok()`).
  std::optional<Position> Size();

 protected:
  using Object::Object;

  DefaultChunkReaderBase(DefaultChunkReaderBase&& that) noexcept;
  DefaultChunkReaderBase& operator=(DefaultChunkReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(Reader* src);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;

 private:
  enum class Recoverable { kNo, kHaveChunk, kFindChunk };
  enum class WhichChunk { kContaining, kBefore, kAfter };

  // Interprets a `false` result from `src` reading or seeking function.
  //
  // End of file (i.e. if `ok()`) is propagated, setting `truncated_` if it was
  // in the middle of a chunk.
  //
  // Always returns `false`.
  bool FailReading(const Reader& src);

  // Interprets a `false` result from `src` reading or seeking function.
  //
  // End of file (i.e. if `ok()`) fails the `ChunkReader`.
  //
  // Always returns `false`.
  bool FailSeeking(const Reader& src, Position new_pos);

  // Reads or continues reading `chunk_.header`.
  bool ReadChunkHeader();

  // Reads or continues reading `block_header_`.
  //
  // Preconditions:
  //   `ok()`
  //   `records_internal::RemainingInBlockHeader(SrcReader()->pos()) > 0`
  bool ReadBlockHeader();

  // Shared implementation of `SeekToChunkContaining()`, `SeekToChunkBefore()`,
  // and `SeekToChunkAfter()`.
  //
  // This template is defined and used only in chunk_reader.cc.
  template <WhichChunk which_chunk>
  bool SeekToChunk(Position new_pos);

  // If `true`, the source is truncated (in the middle of a chunk) at the
  // current position. If the source does not grow, `Close()` will fail.
  //
  // Invariant: if `truncated_` then `SrcReader()->pos() > pos_`
  bool truncated_ = false;

  // Beginning of the current chunk.
  //
  // If `pos_ > SrcReader()->pos()`, the source ends in a skipped region. In
  // this case `pos_` can be a block boundary instead of a chunk boundary.
  Position pos_ = 0;

  // Chunk header and chunk data, filled to the point derived from `pos_` and
  // `SrcReader()->pos()`.
  Chunk chunk_;

  // Block header, filled to the point derived from `SrcReader()->pos()`.
  records_internal::BlockHeader block_header_;

  // Whether `Recover()` is applicable, and if so, how it should be performed:
  //
  //  * `Recoverable::kNo`        - `Recover()` is not applicable
  //  * `Recoverable::kHaveChunk` - `Recover()` assumes that a chunk starts
  //                                at `recoverable_pos_`
  //  * `Recoverable::kFindChunk` - `Recover()` finds a block after
  //                                `recoverable_pos_`, and a chunk after
  //                                 the block
  //
  // Invariants:
  //   if `ok()` then `recoverable_ == Recoverable::kNo`
  //   if `!is_open()` then `recoverable_ == Recoverable::kNo ||
  //                         recoverable_ == Recoverable::kHaveChunk`
  Recoverable recoverable_ = Recoverable::kNo;

  // If `recoverable_ != Recoverable::kNo`, the position to start recovery from.
  //
  // Invariant:
  //   if `recoverable_ != Recoverable::kNo` then `recoverable_pos_ >= pos_`
  Position recoverable_pos_ = 0;
};

// A `ChunkReader` reads chunks of a Riegeli/records file (rather than
// individual records, as `RecordReader` does).
//
// TODO: If the need arises, `ChunkReader` can be made more abstract
// than `DefaultChunkReaderBase`, similarly to `ChunkWriter`.
using ChunkReader = DefaultChunkReaderBase;

// The default `ChunkReader`. Reads chunks from a byte `Reader`, expecting them
// to be interleaved with block headers at multiples of the Riegeli/records
// block size.
//
// `DefaultChunkReader` can be used together with `DefaultChunkWriter` to
// rewrite Riegeli/records files without recompressing chunks, e.g. to
// concatenate files.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the byte `Reader`. `Src` must support
// `Dependency<Reader*, Src>`, e.g. `Reader*` (not owned, default),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The byte `Reader` must not be accessed until the `DefaultChunkReader` is
// closed or no longer used.
template <typename Src = Reader*>
class DefaultChunkReader : public DefaultChunkReaderBase {
 public:
  // Creates a closed `DefaultChunkReader`.
  explicit DefaultChunkReader(Closed) : DefaultChunkReaderBase(kClosed) {}

  // Will read from the byte `Reader` provided by `src`.
  explicit DefaultChunkReader(Initializer<Src> src);

  DefaultChunkReader(DefaultChunkReader&& that) = default;
  DefaultChunkReader& operator=(DefaultChunkReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `DefaultChunkReader`. This
  // avoids constructing a temporary `DefaultChunkReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src);

  // Returns the object providing and possibly owning the byte `Reader`.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;

 private:
  // The object providing and possibly owning the Riegeli/records file being
  // read from.
  Dependency<Reader*, Src> src_;
};

explicit DefaultChunkReader(Closed) -> DefaultChunkReader<DeleteCtad<Closed>>;
template <typename Src>
explicit DefaultChunkReader(Src&& src) -> DefaultChunkReader<TargetT<Src>>;

// Specialization of `DependencyImpl<ChunkReader*, Manager>` adapted from
// `DependencyImpl<Reader*, Manager>` by wrapping `Manager` in
// `DefaultChunkReader<Manager>`.
template <typename Manager>
class DependencyImpl<
    ChunkReader*, Manager,
    std::enable_if_t<SupportsDependency<Reader*, Manager>::value>> {
 public:
  DependencyImpl() noexcept : chunk_reader_(kClosed) {}

  explicit DependencyImpl(Initializer<Manager> manager)
      : chunk_reader_(std::move(manager)) {}

  ABSL_ATTRIBUTE_REINITIALIZES void Reset() { chunk_reader_.Reset(kClosed); }

  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Manager> manager) {
    chunk_reader_.Reset(std::move(manager));
  }

  Manager& manager() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return chunk_reader_.src();
  }
  const Manager& manager() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return chunk_reader_.src();
  }

  DefaultChunkReader<Manager>* get() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return &chunk_reader_;
  }

  static constexpr bool kIsOwning = true;

  static constexpr bool kIsStable = false;

 protected:
  DependencyImpl(DependencyImpl&& that) = default;
  DependencyImpl& operator=(DependencyImpl&& that) = default;

  ~DependencyImpl() = default;

 private:
  mutable DefaultChunkReader<Manager> chunk_reader_;
};

// Implementation details follow.

inline DefaultChunkReaderBase::DefaultChunkReaderBase(
    DefaultChunkReaderBase&& that) noexcept
    : Object(static_cast<Object&&>(that)),
      truncated_(that.truncated_),
      pos_(that.pos_),
      chunk_(std::move(that.chunk_)),
      block_header_(that.block_header_),
      recoverable_(std::exchange(that.recoverable_, Recoverable::kNo)),
      recoverable_pos_(that.recoverable_pos_) {}

inline DefaultChunkReaderBase& DefaultChunkReaderBase::operator=(
    DefaultChunkReaderBase&& that) noexcept {
  Object::operator=(static_cast<Object&&>(that));
  truncated_ = that.truncated_;
  pos_ = that.pos_;
  chunk_ = that.chunk_;
  block_header_ = that.block_header_;
  recoverable_ = std::exchange(that.recoverable_, Recoverable::kNo);
  recoverable_pos_ = that.recoverable_pos_;
  return *this;
}

inline void DefaultChunkReaderBase::Reset(Closed) {
  Object::Reset(kClosed);
  truncated_ = false;
  pos_ = 0;
  chunk_.Reset();
  recoverable_ = Recoverable::kNo;
  recoverable_pos_ = 0;
}

inline void DefaultChunkReaderBase::Reset() {
  Object::Reset();
  truncated_ = false;
  pos_ = 0;
  chunk_.Clear();
  recoverable_ = Recoverable::kNo;
  recoverable_pos_ = 0;
}

template <typename Src>
inline DefaultChunkReader<Src>::DefaultChunkReader(Initializer<Src> src)
    : src_(std::move(src)) {
  Initialize(src_.get());
}

template <typename Src>
inline void DefaultChunkReader<Src>::Reset(Closed) {
  DefaultChunkReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void DefaultChunkReader<Src>::Reset(Initializer<Src> src) {
  DefaultChunkReaderBase::Reset();
  src_.Reset(std::move(src));
  Initialize(src_.get());
}

template <typename Src>
void DefaultChunkReader<Src>::Done() {
  DefaultChunkReaderBase::Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      FailWithoutAnnotation(src_->status());
    }
  }
}

}  // namespace riegeli

#endif  // RIEGELI_RECORDS_CHUNK_READER_H_


================================================
FILE: riegeli/records/chunk_writer.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/records/chunk_writer.h"

#include <stddef.h>
#include <stdint.h>

#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/chain_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/string_reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/chunk.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/chunk_encoding/hash.h"
#include "riegeli/records/block.h"

namespace riegeli {

namespace records_internal {

Position PosAfterPadding(Position pos, Position padding) {
  if (padding <= 1) return pos;
  const Position remainder = pos % padding;
  if (remainder == 0) return pos;
  Position length = padding - remainder;
  while (length < ChunkHeader::size()) {
    // Not enough space for the chunk header.
    length += padding;
  }
  Position end_pos = pos + length;
  while (!records_internal::IsPossibleChunkBoundary(end_pos)) {
    // `end_pos` falls inside a block header.
    end_pos += padding;
  }
  return end_pos;
}

}  // namespace records_internal

ChunkWriter::~ChunkWriter() {}

void DefaultChunkWriterBase::Initialize(Writer* dest, Position pos) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of DefaultChunkWriter: null Writer pointer";
  if (ABSL_PREDICT_FALSE(!records_internal::IsPossibleChunkBoundary(pos))) {
    const Position length = records_internal::RemainingInBlock(pos);
    dest->Write(ByteFill(length));
    pos += length;
  }
  set_pos(pos);
  if (ABSL_PREDICT_FALSE(!dest->ok())) FailWithoutAnnotation(dest->status());
}

absl::Status DefaultChunkWriterBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    Writer& dest = *DestWriter();
    return dest.AnnotateStatus(std::move(status));
  }
  return status;
}

bool DefaultChunkWriterBase::WriteChunk(const Chunk& chunk) {
  RIEGELI_ASSERT_EQ(chunk.header.data_hash(),
                    chunk_encoding_internal::Hash(chunk.data))
      << "Failed precondition of ChunkWriter::WriteChunk(): "
         "Wrong chunk data hash";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  StringReader<> header_reader(chunk.header.bytes(), chunk.header.size());
  ChainReader<> data_reader(&chunk.data);
  const Position chunk_begin = pos();
  const Position chunk_end = PosAfterWriteChunk(chunk.header);
  if (ABSL_PREDICT_FALSE(
          !WriteSection(header_reader, chunk_begin, chunk_end, dest))) {
    return false;
  }
  if (ABSL_PREDICT_FALSE(
          !WriteSection(data_reader, chunk_begin, chunk_end, dest))) {
    return false;
  }
  if (ABSL_PREDICT_FALSE(!WritePadding(chunk_begin, chunk_end, dest))) {
    return false;
  }
  RIEGELI_ASSERT_EQ(pos(), chunk_end)
      << "Unexpected position after writing chunk";
  return true;
}

inline bool DefaultChunkWriterBase::WriteSection(Reader& src,
                                                 Position chunk_begin,
                                                 Position chunk_end,
                                                 Writer& dest) {
  const std::optional<Position> size = src.Size();
  RIEGELI_ASSERT(size != std::nullopt) << src.status();
  RIEGELI_ASSERT_EQ(src.pos(), 0u) << "Non-zero section reader position";
  while (src.pos() < *size) {
    if (records_internal::IsBlockBoundary(pos())) {
      records_internal::BlockHeader block_header(
          IntCast<uint64_t>(pos() - chunk_begin),
          IntCast<uint64_t>(chunk_end - pos()));
      if (ABSL_PREDICT_FALSE(!dest.Write(
              absl::string_view(block_header.bytes(), block_header.size())))) {
        return FailWithoutAnnotation(dest.status());
      }
      move_pos(block_header.size());
    }
    const Position length = UnsignedMin(
        *size - src.pos(), records_internal::RemainingInBlock(pos()));
    if (ABSL_PREDICT_FALSE(!src.Copy(length, dest))) {
      return FailWithoutAnnotation(dest.status());
    }
    move_pos(length);
  }
  RIEGELI_EVAL_ASSERT(src.Close()) << src.status();
  return true;
}

inline bool DefaultChunkWriterBase::WritePadding(Position chunk_begin,
                                                 Position chunk_end,
                                                 Writer& dest) {
  while (pos() < chunk_end) {
    if (records_internal::IsBlockBoundary(pos())) {
      records_internal::BlockHeader block_header(
          IntCast<uint64_t>(pos() - chunk_begin),
          IntCast<uint64_t>(chunk_end - pos()));
      if (ABSL_PREDICT_FALSE(!dest.Write(
              absl::string_view(block_header.bytes(), block_header.size())))) {
        return FailWithoutAnnotation(dest.status());
      }
      move_pos(block_header.size());
    }
    const Position length = UnsignedMin(
        chunk_end - pos(), records_internal::RemainingInBlock(pos()));
    if (ABSL_PREDICT_FALSE(!dest.Write(ByteFill(length)))) {
      return FailWithoutAnnotation(dest.status());
    }
    move_pos(length);
  }
  return true;
}

bool DefaultChunkWriterBase::WritePadding(Position padding) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const Position pos_after_pad =
      records_internal::PosAfterPadding(pos(), padding);
  if (pos_after_pad == pos()) return true;

  // Excludes the chunk header and any intervening block headers.
  const size_t padding_data_length =
      records_internal::DistanceWithoutOverhead(pos(), pos_after_pad) -
      ChunkHeader::size();
  Chunk chunk;
  chunk.data = Chain(ByteFill(padding_data_length));
  chunk.header = ChunkHeader(chunk.data, ChunkType::kPadding, 0, 0);
  return WriteChunk(chunk);
}

}  // namespace riegeli


================================================
FILE: riegeli/records/chunk_writer.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_RECORDS_CHUNK_WRITER_H_
#define RIEGELI_RECORDS_CHUNK_WRITER_H_

#include <limits>
#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/chunk.h"
#include "riegeli/records/block.h"

namespace riegeli {

namespace records_internal {
// Returns the position after `padding` is written after `pos`.
Position PosAfterPadding(Position pos, Position padding);
}  // namespace records_internal

class Reader;

// A `ChunkWriter` writes chunks of a Riegeli/records file (rather than
// individual records, as `RecordWriter` does) to a destination.
//
// A `ChunkWriter` object can manage a buffer of data to be pushed to the
// destination, which amortizes the overhead of pushing data over multiple
// writes.
class ChunkWriter : public Object {
 public:
  ~ChunkWriter() override;

  // Writes a chunk, pushing data to the destination as needed.
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  virtual bool WriteChunk(const Chunk& chunk) = 0;

  // Writes padding to reach a position which is a multiple of `padding`.
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  virtual bool WritePadding(Position padding) = 0;

  // Pushes buffered data to the destination.
  //
  // This makes data written so far visible, but in contrast to `Close()`,
  // keeps the possibility to write more data later. What exactly does it mean
  // for data to be visible depends on the destination.
  //
  // The scope of objects to flush and the intended data durability (without a
  // guarantee) are specified by `flush_type`:
  //  * `FlushType::kFromObject`  - Makes data written so far visible in other
  //                                objects, propagating flushing through owned
  //                                dependencies of the given writer.
  //  * `FlushType::kFromProcess` - Makes data written so far visible outside
  //                                the process, propagating flushing through
  //                                dependencies of the given writer.
  //                                This is the default.
  //  * `FlushType::kFromMachine` - Makes data written so far visible outside
  //                                the process and durable in case of operating
  //                                system crash, propagating flushing through
  //                                dependencies of the given writer.
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  bool Flush(FlushType flush_type = FlushType::kFromProcess);

  // Returns the current byte position. Unchanged by `Close()`.
  Position pos() const { return pos_; }

 protected:
  using Object::Object;

  ChunkWriter(ChunkWriter&& that) noexcept;
  ChunkWriter& operator=(ChunkWriter&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void set_pos(Position pos) { pos_ = pos; }
  void move_pos(Position length) {
    RIEGELI_ASSERT_LE(length, std::numeric_limits<Position>::max() - pos_)
        << "Failed precondition of ChunkWriter::move_pos(): position overflow";
    pos_ += length;
  }
  virtual bool FlushImpl(FlushType flush_type) = 0;

  // Returns the expected position after `WriteChunk()` at the current position.
  Position PosAfterWriteChunk(const ChunkHeader& chunk_header) const;

 private:
  Position pos_ = 0;
};

// Template parameter independent part of `DefaultChunkWriter`.
class DefaultChunkWriterBase : public ChunkWriter {
 public:
  class Options {
   public:
    Options() noexcept {}

    // File position assumed initially.
    //
    // This can be used to prepare a file fragment which can be appended to the
    // target file at the given position.
    //
    // `std::nullopt` means `DestWriter()->pos()`.
    //
    // Default: `std::nullopt`.
    Options& set_assumed_pos(std::optional<Position> assumed_pos) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      assumed_pos_ = assumed_pos;
      return *this;
    }
    Options&& set_assumed_pos(std::optional<Position> assumed_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_assumed_pos(assumed_pos));
    }
    std::optional<Position> assumed_pos() const { return assumed_pos_; }

   private:
    std::optional<Position> assumed_pos_;
  };

  // Returns the Riegeli/records file being written to. Unchanged by `Close()`.
  virtual Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool WriteChunk(const Chunk& chunk) override;
  bool WritePadding(Position padding) override;

 protected:
  using ChunkWriter::ChunkWriter;

  DefaultChunkWriterBase(DefaultChunkWriterBase&& that) noexcept;
  DefaultChunkWriterBase& operator=(DefaultChunkWriterBase&& that) noexcept;

  void Initialize(Writer* dest, Position pos);

  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;

 private:
  bool WriteSection(Reader& src, Position chunk_begin, Position chunk_end,
                    Writer& dest);
  bool WritePadding(Position chunk_begin, Position chunk_end, Writer& dest);
};

// The default `ChunkWriter`. Writes chunks to a byte `Writer`, interleaving
// them with block headers at multiples of the Riegeli/records block size.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the byte `Writer`. `Dest` must support
// `Dependency<Writer*, Dest>`, e.g. `Writer*` (not owned, default),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The byte `Writer` must not be accessed until the `DefaultChunkWriter` is
// closed or no longer used, except that it is allowed to read the destination
// of the byte `Writer` immediately after `Flush()`.
template <typename Dest = Writer*>
class DefaultChunkWriter : public DefaultChunkWriterBase {
 public:
  // Creates a closed `DefaultChunkWriter`.
  explicit DefaultChunkWriter(Closed) noexcept
      : DefaultChunkWriterBase(kClosed) {}

  // Will write to the byte `Writer` provided by `dest`.
  explicit DefaultChunkWriter(Initializer<Dest> dest,
                              Options options = Options());

  DefaultChunkWriter(DefaultChunkWriter&& that) = default;
  DefaultChunkWriter& operator=(DefaultChunkWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `DefaultChunkWriter`. This
  // avoids constructing a temporary `DefaultChunkWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the byte `Writer`.
  // Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  // The object providing and possibly owning the Riegeli/records file being
  // written to.
  Dependency<Writer*, Dest> dest_;
};

explicit DefaultChunkWriter(Closed) -> DefaultChunkWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit DefaultChunkWriter(
    Dest&& dest,
    DefaultChunkWriterBase::Options options = DefaultChunkWriterBase::Options())
    -> DefaultChunkWriter<TargetT<Dest>>;

// Specialization of `DependencyImpl<ChunkWriter*, Manager>` adapted from
// `DependencyImpl<Writer*, Manager>` by wrapping `Manager` in
// `DefaultChunkWriter<Manager>`.
template <typename Manager>
class DependencyImpl<
    ChunkWriter*, Manager,
    std::enable_if_t<SupportsDependency<Writer*, Manager>::value>> {
 public:
  DependencyImpl() noexcept : chunk_writer_(kClosed) {}

  explicit DependencyImpl(Initializer<Manager> manager)
      : chunk_writer_(std::move(manager)) {}

  ABSL_ATTRIBUTE_REINITIALIZES void Reset() { chunk_writer_.Reset(kClosed); }

  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Manager> manager) {
    chunk_writer_.Reset(std::move(manager));
  }

  Manager& manager() ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return chunk_writer_.dest();
  }
  const Manager& manager() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return chunk_writer_.dest();
  }

  DefaultChunkWriter<Manager>* get() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return &chunk_writer_;
  }

  static constexpr bool kIsOwning = true;

  static constexpr bool kIsStable = false;

 protected:
  DependencyImpl(DependencyImpl&& that) = default;
  DependencyImpl& operator=(DependencyImpl&& that) = default;

  ~DependencyImpl() = default;

 private:
  mutable DefaultChunkWriter<Manager> chunk_writer_;
};

// Implementation details follow.

inline ChunkWriter::ChunkWriter(ChunkWriter&& that) noexcept
    : Object(static_cast<Object&&>(that)), pos_(that.pos_) {}

inline ChunkWriter& ChunkWriter::operator=(ChunkWriter&& that) noexcept {
  Object::operator=(static_cast<Object&&>(that));
  pos_ = that.pos_;
  return *this;
}

inline void ChunkWriter::Reset(Closed) {
  Object::Reset(kClosed);
  pos_ = 0;
}

inline void ChunkWriter::Reset() {
  Object::Reset();
  pos_ = 0;
}

inline bool ChunkWriter::Flush(FlushType flush_type) {
  return FlushImpl(flush_type);
}

inline Position ChunkWriter::PosAfterWriteChunk(
    const ChunkHeader& chunk_header) const {
  return records_internal::ChunkEnd(chunk_header, pos_);
}

inline DefaultChunkWriterBase::DefaultChunkWriterBase(
    DefaultChunkWriterBase&& that) noexcept
    : ChunkWriter(static_cast<ChunkWriter&&>(that)) {}

inline DefaultChunkWriterBase& DefaultChunkWriterBase::operator=(
    DefaultChunkWriterBase&& that) noexcept {
  ChunkWriter::operator=(static_cast<ChunkWriter&&>(that));
  return *this;
}

template <typename Dest>
inline DefaultChunkWriter<Dest>::DefaultChunkWriter(Initializer<Dest> dest,
                                                    Options options)
    : dest_(std::move(dest)) {
  Initialize(dest_.get(), options.assumed_pos().value_or(dest_->pos()));
}

template <typename Dest>
inline void DefaultChunkWriter<Dest>::Reset(Closed) {
  DefaultChunkWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void DefaultChunkWriter<Dest>::Reset(Initializer<Dest> dest,
                                            Options options) {
  DefaultChunkWriterBase::Reset();
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options.assumed_pos().value_or(dest_->pos()));
}

template <typename Dest>
void DefaultChunkWriter<Dest>::Done() {
  DefaultChunkWriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(dest_->status());
    }
  }
}

template <typename Dest>
bool DefaultChunkWriter<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (flush_type != FlushType::kFromObject || dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Flush(flush_type))) {
      return FailWithoutAnnotation(dest_->status());
    }
  }
  return true;
}

}  // namespace riegeli

#endif  // RIEGELI_RECORDS_CHUNK_WRITER_H_


================================================
FILE: riegeli/records/record_position.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/records/record_position.h"

#include <stddef.h>
#include <stdint.h>

#include <future>
#include <limits>
#include <ostream>
#include <string>
#include <utility>
#include <variant>
#include <vector>

#include "absl/base/optimization.h"
#include "absl/strings/numbers.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/shared_ptr.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/string_reader.h"
#include "riegeli/bytes/string_writer.h"
#include "riegeli/chunk_encoding/chunk.h"
#include "riegeli/endian/endian_reading.h"
#include "riegeli/ordered_varint/ordered_varint_reading.h"
#include "riegeli/ordered_varint/ordered_varint_writing.h"
#include "riegeli/records/block.h"
#include "riegeli/records/chunk_writer.h"

namespace riegeli {

std::string RecordPosition::ToString() const {
  return absl::StrCat(chunk_begin_, "/", record_index_);
}

bool RecordPosition::FromString(absl::string_view serialized) {
  const size_t sep = serialized.find('/');
  if (ABSL_PREDICT_FALSE(sep == absl::string_view::npos)) return false;
  uint64_t chunk_begin;
  if (ABSL_PREDICT_FALSE(
          !absl::SimpleAtoi(serialized.substr(0, sep), &chunk_begin))) {
    return false;
  }
  uint64_t record_index;
  if (ABSL_PREDICT_FALSE(
          !absl::SimpleAtoi(serialized.substr(sep + 1), &record_index))) {
    return false;
  }
  if (ABSL_PREDICT_FALSE(record_index >
                         std::numeric_limits<uint64_t>::max() - chunk_begin)) {
    return false;
  }
  chunk_begin_ = chunk_begin;
  record_index_ = record_index;
  return true;
}

std::string RecordPosition::ToBytes() const {
  std::string serialized;
  StringWriter<> writer(&serialized);
  WriteOrderedVarint64(chunk_begin_, writer);
  WriteOrderedVarint64(record_index_, writer);
  writer.Close();
  return serialized;
}

bool RecordPosition::FromBytes(absl::string_view serialized) {
  if (serialized.size() == 2 * sizeof(uint64_t)) {
    // Reading the old format is temporarily supported too.
    const uint64_t chunk_begin = ReadBigEndian<uint64_t>(serialized.data());
    const uint64_t record_index =
        ReadBigEndian<uint64_t>(serialized.data() + sizeof(uint64_t));
    if (ABSL_PREDICT_FALSE(record_index > std::numeric_limits<uint64_t>::max() -
                                              chunk_begin)) {
      return false;
    }
    chunk_begin_ = chunk_begin;
    record_index_ = record_index;
    return true;
  }
  StringReader<> reader(serialized);
  uint64_t chunk_begin, record_index;
  if (ABSL_PREDICT_FALSE(!ReadOrderedVarint64(reader, chunk_begin))) {
    return false;
  }
  if (ABSL_PREDICT_FALSE(!ReadOrderedVarint64(reader, record_index))) {
    return false;
  }
  if (ABSL_PREDICT_FALSE(record_index >
                         std::numeric_limits<uint64_t>::max() - chunk_begin)) {
    return false;
  }
  if (ABSL_PREDICT_FALSE(!reader.VerifyEndAndClose())) return false;
  chunk_begin_ = chunk_begin;
  record_index_ = record_index;
  return true;
}

void RecordPosition::Output(std::ostream& dest) const { dest << ToString(); }

namespace records_internal {

inline FutureChunkBegin::Unresolved::Unresolved(Position pos_before_chunks,
                                                std::vector<Action> actions)
    : pos_before_chunks_(pos_before_chunks), actions_(std::move(actions)) {}

void FutureChunkBegin::Unresolved::Resolve() const {
  struct Visitor {
    void operator()(const std::shared_future<ChunkHeader>& chunk_header) {
      // Matches `ChunkWriter::PosAfterWriteChunk()`.
      pos = records_internal::ChunkEnd(chunk_header.get(), pos);
    }
    void operator()(const WritePadding& write_padding) {
      pos = records_internal::PosAfterPadding(pos, write_padding.padding);
    }

    Position pos;
  };
  Visitor visitor{pos_before_chunks_};
  for (const Action& action : actions_) std::visit(visitor, action);
  pos_before_chunks_ = visitor.pos;
  actions_ = std::vector<Action>();
}

FutureChunkBegin::FutureChunkBegin(Position pos_before_chunks,
                                   std::vector<Action> actions)
    : unresolved_(actions.empty()
                      ? nullptr
                      : SharedPtr<const Unresolved>(riegeli::Maker(
                            pos_before_chunks, std::move(actions)))),
      resolved_(pos_before_chunks) {}

}  // namespace records_internal

}  // namespace riegeli


================================================
FILE: riegeli/records/record_position.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_RECORDS_RECORD_POSITION_H_
#define RIEGELI_RECORDS_RECORD_POSITION_H_

#include <stdint.h>

#include <future>
#include <iosfwd>
#include <limits>
#include <string>
#include <utility>
#include <variant>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/base/call_once.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/shared_ptr.h"
#include "riegeli/base/types.h"
#include "riegeli/chunk_encoding/chunk.h"

namespace riegeli {

// `RecordPosition` represents the position of a record in a Riegeli/records
// file, or a position between records.
//
// There are two ways of expressing positions, both strictly monotonic:
//  * `RecordPosition` (a class) - Faster for seeking.
//  * `Position` (an integer)    - Scaled between 0 and file size.
//
// `RecordPosition` can be converted to `Position` by `numeric()`.
//
// Working with `RecordPosition` is recommended, unless it is needed to seek to
// an approximate position interpolated along the file, e.g. for splitting the
// file into shards, or unless the position must be expressed as an integer from
// the range [0..`file_size`] in order to fit into a preexisting API.
class RecordPosition : public WithCompare<RecordPosition> {
 public:
  // Creates a `RecordPosition` corresponding to the first record.
  constexpr RecordPosition() = default;

  // Creates a `RecordPosition` corresponding to the given record of the chunk
  // beginning at the given file position.
  explicit RecordPosition(uint64_t chunk_begin, uint64_t record_index);

  RecordPosition(const RecordPosition& that) = default;
  RecordPosition& operator=(const RecordPosition& that) = default;

  // File position of the beginning of the chunk containing the given record.
  uint64_t chunk_begin() const { return chunk_begin_; }
  // Index of the record within the chunk.
  uint64_t record_index() const { return record_index_; }

  // Converts `RecordPosition` to an integer scaled between 0 and file size.
  // Distinct `RecordPosition`s of a valid file have distinct numeric values.
  uint64_t numeric() const { return chunk_begin_ + record_index_; }

  // Text format: "<chunk_begin>/<record_index>".
  std::string ToString() const;
  bool FromString(absl::string_view serialized);

  // Binary format: `chunk_begin` and `record_index` as BigEndian-encoded 8-byte
  // integers. Serialized strings have the same natural order as the
  // corresponding positions.
  std::string ToBytes() const;
  bool FromBytes(absl::string_view serialized);

  friend bool operator==(RecordPosition a, RecordPosition b) {
    return a.chunk_begin() == b.chunk_begin() &&
           a.record_index() == b.record_index();
  }
  friend StrongOrdering RIEGELI_COMPARE(const RecordPosition& a,
                                        const RecordPosition& b) {
    if (const StrongOrdering ordering =
            riegeli::Compare(a.chunk_begin(), b.chunk_begin());
        ordering != 0) {
      return ordering;
    }
    return riegeli::Compare(a.record_index(), b.record_index());
  }

  template <typename HashState>
  friend HashState AbslHashValue(HashState hash_state, RecordPosition self) {
    return HashState::combine(std::move(hash_state), self.chunk_begin_,
                              self.record_index_);
  }

  // Default stringification by `absl::StrCat()` etc.
  //
  // Writes `src.ToString()` to `dest`.
  template <typename Sink>
  friend void AbslStringify(Sink& dest, RecordPosition src) {
    dest.Append(src.ToString());
  }

  // Writes `src.ToString()` to `dest`.
  friend std::ostream& operator<<(std::ostream& dest, RecordPosition src) {
    src.Output(dest);
    return dest;
  }

 private:
  void Output(std::ostream& dest) const;

  // Invariant:
  //   `record_index_ <= std::numeric_limits<uint64_t>::max() - chunk_begin_`
  uint64_t chunk_begin_ = 0;
  uint64_t record_index_ = 0;
};

// `FutureChunkBegin` is similar to `std::shared_future<Position>`.
//
// It is used to implement `FutureRecordPosition` and internally in
// `RecordWriter`.

namespace records_internal {

class ABSL_ATTRIBUTE_TRIVIAL_ABI FutureChunkBegin {
 public:
  struct WritePadding {
    Position padding;
  };
  using Action = std::variant<std::shared_future<ChunkHeader>, WritePadding>;

  constexpr FutureChunkBegin() = default;

  /*implicit*/ FutureChunkBegin(Position chunk_begin) noexcept;

  explicit FutureChunkBegin(Position pos_before_chunks,
                            std::vector<Action> actions);

  FutureChunkBegin(const FutureChunkBegin& that) noexcept;
  FutureChunkBegin& operator=(const FutureChunkBegin& that) noexcept;

  FutureChunkBegin(FutureChunkBegin&& that) noexcept;
  FutureChunkBegin& operator=(FutureChunkBegin&& that) noexcept;

  // May block if returned by `RecordWriter` with `parallelism > 0`.
  Position get() const;

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const FutureChunkBegin* self,
                                        MemoryEstimator& memory_estimator) {
    memory_estimator.RegisterSubobjects(&self->unresolved_);
  }

 private:
  class Unresolved;

  // `unresolved_` is a pointer to save memory in the common case when
  // it is absent.
  //
  // The pointer uses shared ownership because `Unresolved` is not copyable,
  // which is because its contents are resolved lazily in a const method,
  // so a copy constructor would need to block.
  SharedPtr<const Unresolved> unresolved_;
  // If `unresolved_ == nullptr`, `chunk_begin_` is stored here,
  // otherwise it is `unresolved_->get()`.
  Position resolved_ = 0;
};

}  // namespace records_internal

// `FutureRecordPosition` is similar to `std::shared_future<RecordPosition>`.
//
// `RecordWriter` returns `FutureRecordPosition` instead of `RecordPosition`
// because with `parallelism > 0` the actual position is not known until pending
// chunks finish encoding in background.
class ABSL_ATTRIBUTE_TRIVIAL_ABI FutureRecordPosition {
 public:
  constexpr FutureRecordPosition() = default;

  /*implicit*/ FutureRecordPosition(RecordPosition pos) noexcept;

  explicit FutureRecordPosition(records_internal::FutureChunkBegin chunk_begin,
                                uint64_t record_index);

  FutureRecordPosition(const FutureRecordPosition& that) noexcept;
  FutureRecordPosition& operator=(const FutureRecordPosition& that) noexcept;

  FutureRecordPosition(FutureRecordPosition&& that) noexcept;
  FutureRecordPosition& operator=(FutureRecordPosition&& that) noexcept;

  // May block if returned by `RecordWriter` with `parallelism > 0`.
  RecordPosition get() const;

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const FutureRecordPosition* self,
                                        MemoryEstimator& memory_estimator) {
    memory_estimator.RegisterSubobjects(&self->chunk_begin_);
  }

 private:
  records_internal::FutureChunkBegin chunk_begin_;
  uint64_t record_index_ = 0;
};

// Implementation details follow.

inline RecordPosition::RecordPosition(uint64_t chunk_begin,
                                      uint64_t record_index)
    : chunk_begin_(chunk_begin), record_index_(record_index) {
  RIEGELI_ASSERT_LE(record_index,
                    std::numeric_limits<uint64_t>::max() - chunk_begin)
      << "RecordPosition overflow";
}

namespace records_internal {

class FutureChunkBegin::Unresolved {
 public:
  explicit Unresolved(Position pos_before_chunks, std::vector<Action> actions);

  Unresolved(const Unresolved&) = delete;
  Unresolved& operator=(const Unresolved&) = delete;

  Position get() const;

  // Supports `MemoryEstimator`.
  template <typename MemoryEstimator>
  friend void RiegeliRegisterSubobjects(const Unresolved* self,
                                        MemoryEstimator& memory_estimator) {
    memory_estimator.RegisterSubobjects(&self->actions_);
  }

 private:
  void Resolve() const;

  mutable absl::once_flag flag_;
  // Position before writing chunks according to `actions_`.
  mutable Position pos_before_chunks_ = 0;
  // Headers of chunks to be written after `pos_before_chunks_`.
  mutable std::vector<Action> actions_;
};

inline Position FutureChunkBegin::Unresolved::get() const {
  absl::call_once(flag_, &Unresolved::Resolve, this);
  RIEGELI_ASSERT(actions_.empty()) << "FutureChunkBegin::Unresolved::Resolve() "
                                      "did not clear actions_";
  return pos_before_chunks_;
}

inline FutureChunkBegin::FutureChunkBegin(Position chunk_begin) noexcept
    : resolved_(chunk_begin) {}

inline FutureChunkBegin::FutureChunkBegin(const FutureChunkBegin& that) noexcept
    : unresolved_(that.unresolved_), resolved_(that.resolved_) {}

inline FutureChunkBegin& FutureChunkBegin::operator=(
    const FutureChunkBegin& that) noexcept {
  unresolved_ = that.unresolved_;
  resolved_ = that.resolved_;
  return *this;
}

inline FutureChunkBegin::FutureChunkBegin(FutureChunkBegin&& that) noexcept
    : unresolved_(std::move(that.unresolved_)),
      resolved_(std::exchange(that.resolved_, 0)) {}

inline FutureChunkBegin& FutureChunkBegin::operator=(
    FutureChunkBegin&& that) noexcept {
  unresolved_ = std::move(that.unresolved_);
  resolved_ = std::exchange(that.resolved_, 0);
  return *this;
}

inline Position FutureChunkBegin::get() const {
  return unresolved_ == nullptr ? resolved_ : unresolved_->get();
}

}  // namespace records_internal

inline FutureRecordPosition::FutureRecordPosition(RecordPosition pos) noexcept
    : chunk_begin_(pos.chunk_begin()), record_index_(pos.record_index()) {}

inline FutureRecordPosition::FutureRecordPosition(
    records_internal::FutureChunkBegin chunk_begin, uint64_t record_index)
    : chunk_begin_(std::move(chunk_begin)), record_index_(record_index) {}

inline FutureRecordPosition::FutureRecordPosition(
    const FutureRecordPosition& that) noexcept
    : chunk_begin_(that.chunk_begin_), record_index_(that.record_index_) {}

inline FutureRecordPosition& FutureRecordPosition::operator=(
    const FutureRecordPosition& that) noexcept {
  chunk_begin_ = that.chunk_begin_;
  record_index_ = that.record_index_;
  return *this;
}

inline FutureRecordPosition::FutureRecordPosition(
    FutureRecordPosition&& that) noexcept
    : chunk_begin_(std::move(that.chunk_begin_)),
      record_index_(std::exchange(that.record_index_, 0)) {}

inline FutureRecordPosition& FutureRecordPosition::operator=(
    FutureRecordPosition&& that) noexcept {
  chunk_begin_ = std::move(that.chunk_begin_);
  record_index_ = std::exchange(that.record_index_, 0);
  return *this;
}

inline RecordPosition FutureRecordPosition::get() const {
  return RecordPosition(chunk_begin_.get(), record_index_);
}

}  // namespace riegeli

#endif  // RIEGELI_RECORDS_RECORD_POSITION_H_


================================================
FILE: riegeli/records/record_reader.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/records/record_reader.h"

#include <stddef.h>
#include <stdint.h>

#include <functional>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <vector>

#include "absl/base/optimization.h"
#include "absl/functional/function_ref.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/descriptor.pb.h"
#include "google/protobuf/message.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/binary_search.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/chain_backward_writer.h"
#include "riegeli/bytes/chain_reader.h"
#include "riegeli/chunk_encoding/chunk.h"
#include "riegeli/chunk_encoding/chunk_decoder.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/chunk_encoding/field_projection.h"
#include "riegeli/chunk_encoding/transpose_decoder.h"
#include "riegeli/messages/parse_message.h"
#include "riegeli/records/chunk_reader.h"
#include "riegeli/records/record_position.h"
#include "riegeli/records/records_metadata.pb.h"
#include "riegeli/records/skipped_region.h"

namespace riegeli {

class RecordsMetadataDescriptors::ErrorCollector
    : public google::protobuf::DescriptorPool::ErrorCollector {
 public:
  void RecordError(absl::string_view filename, absl::string_view element_name,
                   const google::protobuf::Message* descriptor,
                   ErrorLocation location, absl::string_view message) override {
    descriptors_->Fail(absl::InvalidArgumentError(
        absl::StrCat("Error in file ", filename, ", element ", element_name,
                     ": ", message)));
  }

 private:
  friend class RecordsMetadataDescriptors;

  explicit ErrorCollector(RecordsMetadataDescriptors* descriptors)
      : descriptors_(descriptors) {}

  RecordsMetadataDescriptors* descriptors_;
};

RecordsMetadataDescriptors::RecordsMetadataDescriptors(
    const RecordsMetadata& metadata)
    : record_type_name_(metadata.record_type_name()) {
  if (record_type_name_.empty() || metadata.file_descriptor().empty()) return;
  pool_ = std::make_unique<google::protobuf::DescriptorPool>();
  ErrorCollector error_collector(this);
  for (const google::protobuf::FileDescriptorProto& file_descriptor :
       metadata.file_descriptor()) {
    if (ABSL_PREDICT_FALSE(pool_->BuildFileCollectingErrors(
                               file_descriptor, &error_collector) == nullptr)) {
      return;
    }
  }
}

const google::protobuf::Descriptor* RecordsMetadataDescriptors::descriptor()
    const {
  if (pool_ == nullptr) return nullptr;
  return pool_->FindMessageTypeByName(record_type_name_);
}

RecordReaderBase::RecordReaderBase(Closed) noexcept : Object(kClosed) {}

RecordReaderBase::RecordReaderBase() noexcept {}

RecordReaderBase::RecordReaderBase(RecordReaderBase&& that) noexcept
    : Object(static_cast<Object&&>(that)),
      chunk_begin_(that.chunk_begin_),
      chunk_decoder_(std::move(that.chunk_decoder_)),
      last_record_is_valid_(std::exchange(that.last_record_is_valid_, false)),
      flatten_(std::exchange(that.flatten_, false)),
      recoverable_(std::exchange(that.recoverable_, Recoverable::kNo)),
      recovery_(std::move(that.recovery_)),
      recycling_pool_options_(that.recycling_pool_options_) {}

RecordReaderBase& RecordReaderBase::operator=(
    RecordReaderBase&& that) noexcept {
  Object::operator=(static_cast<Object&&>(that));
  chunk_begin_ = that.chunk_begin_;
  chunk_decoder_ = std::move(that.chunk_decoder_);
  last_record_is_valid_ = std::exchange(that.last_record_is_valid_, false);
  flatten_ = std::exchange(that.flatten_, false);
  recoverable_ = std::exchange(that.recoverable_, Recoverable::kNo);
  recovery_ = std::move(that.recovery_);
  recycling_pool_options_ = that.recycling_pool_options_;
  return *this;
}

void RecordReaderBase::Reset(Closed) {
  Object::Reset(kClosed);
  chunk_begin_ = 0;
  chunk_decoder_.Reset();
  last_record_is_valid_ = false;
  flatten_ = false;
  recoverable_ = Recoverable::kNo;
  recovery_ = nullptr;
  recycling_pool_options_ = RecyclingPoolOptions();
}

void RecordReaderBase::Reset() {
  Object::Reset();
  chunk_begin_ = 0;
  chunk_decoder_.Clear();
  last_record_is_valid_ = false;
  flatten_ = false;
  recoverable_ = Recoverable::kNo;
  recovery_ = nullptr;
  recycling_pool_options_ = RecyclingPoolOptions();
}

void RecordReaderBase::Initialize(ChunkReader* src, Options&& options) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of RecordReader: null ChunkReader pointer";
  if (ABSL_PREDICT_FALSE(!src->ok())) {
    FailWithoutAnnotation(src->status());
    return;
  }
  chunk_begin_ = src->pos();
  chunk_decoder_.Reset(
      ChunkDecoder::Options()
          .set_field_projection(std::move(options.field_projection()))
          .set_recycling_pool_options(options.recycling_pool_options()));
  recovery_ = std::move(options.recovery());
  recycling_pool_options_ = options.recycling_pool_options();
}

void RecordReaderBase::Done() {
  last_record_is_valid_ = false;
  recoverable_ = Recoverable::kNo;
  if (ABSL_PREDICT_FALSE(!chunk_decoder_.Close())) {
    Fail(chunk_decoder_.status());
  }
}

inline bool RecordReaderBase::FailReading(const ChunkReader& src) {
  recoverable_ = Recoverable::kRecoverChunkReader;
  FailWithoutAnnotation(AnnotateOverSrc(src.status()));
  return TryRecovery();
}

inline bool RecordReaderBase::FailSeeking(const ChunkReader& src) {
  chunk_begin_ = src.pos();
  chunk_decoder_.Clear();
  recoverable_ = Recoverable::kRecoverChunkReader;
  FailWithoutAnnotation(AnnotateOverSrc(src.status()));
  return TryRecovery();
}

absl::Status RecordReaderBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    ChunkReader& src = *SrcChunkReader();
    status = src.AnnotateStatus(std::move(status));
  }
  return AnnotateOverSrc(std::move(status));
}

absl::Status RecordReaderBase::AnnotateOverSrc(absl::Status status) {
  return Annotate(status, absl::StrCat("at record ", pos().ToString()));
}

bool RecordReaderBase::CheckFileFormat() {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (chunk_decoder_.num_records() > 0) return true;
  ChunkReader& src = *SrcChunkReader();
  if (ABSL_PREDICT_FALSE(!src.CheckFileFormat())) {
    chunk_decoder_.Clear();
    if (ABSL_PREDICT_FALSE(!src.ok())) {
      recoverable_ = Recoverable::kRecoverChunkReader;
      return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
    }
    return false;
  }
  return true;
}

bool RecordReaderBase::ReadMetadata(RecordsMetadata& metadata) {
  Chain serialized_metadata;
  if (ABSL_PREDICT_FALSE(!ReadSerializedMetadata(serialized_metadata))) {
    metadata.Clear();
    return false;
  }
  if (absl::Status status = ParseMessage(serialized_metadata, metadata);
      ABSL_PREDICT_FALSE(!status.ok())) {
    metadata.Clear();
    recoverable_ = Recoverable::kRecoverMetadata;
    Fail(std::move(status));
    if (!TryRecovery()) return false;
    // Recovered metadata parsing, assume empty `RecordsMetadata`.
  }
  return true;
}

bool RecordReaderBase::ReadSerializedMetadata(Chain& metadata) {
  metadata.Clear();
  last_record_is_valid_ = false;
  if (ABSL_PREDICT_FALSE(!ok())) {
    if (!TryRecovery()) return false;
    last_record_is_valid_ = true;
    return ok();
  }
  ChunkReader& src = *SrcChunkReader();
  if (ABSL_PREDICT_FALSE(src.pos() != 0)) {
    return Fail(absl::FailedPreconditionError(
        "RecordReaderBase::ReadMetadata() must be called "
        "while the RecordReader is at the beginning of the file"));
  }

  chunk_begin_ = src.pos();
  Chunk chunk;
  if (ABSL_PREDICT_FALSE(!src.ReadChunk(chunk))) {
    if (ABSL_PREDICT_FALSE(!src.ok())) return FailReading(src);
    return false;
  }
  RIEGELI_ASSERT_EQ(chunk.header.chunk_type(), ChunkType::kFileSignature)
      << "Unexpected type of the first chunk: "
      << static_cast<unsigned>(chunk.header.chunk_type());

  chunk_begin_ = src.pos();
  const ChunkHeader* chunk_header;
  if (ABSL_PREDICT_FALSE(!src.PullChunkHeader(&chunk_header))) {
    if (ABSL_PREDICT_FALSE(!src.ok())) return FailReading(src);
    return false;
  }
  if (chunk_header->chunk_type() != ChunkType::kFileMetadata) {
    // Missing file metadata chunk, assume empty `RecordsMetadata`.
    return true;
  }
  if (ABSL_PREDICT_FALSE(!src.ReadChunk(chunk))) {
    if (ABSL_PREDICT_FALSE(!src.ok())) return FailReading(src);
    return false;
  }
  if (ABSL_PREDICT_FALSE(!ParseMetadata(chunk, metadata))) {
    metadata.Clear();
    recoverable_ = Recoverable::kRecoverChunkDecoder;
    if (!TryRecovery()) return false;
    // Recovered metadata decoding, assume empty `RecordsMetadata`.
  }
  last_record_is_valid_ = true;
  return true;
}

inline bool RecordReaderBase::ParseMetadata(const Chunk& chunk,
                                            Chain& metadata) {
  RIEGELI_ASSERT_EQ(chunk.header.chunk_type(), ChunkType::kFileMetadata)
      << "Failed precondition of RecordReaderBase::ParseMetadata(): "
         "wrong chunk type";
  if (ABSL_PREDICT_FALSE(chunk.header.num_records() != 0)) {
    return Fail(absl::InvalidArgumentError(absl::StrCat(
        "Invalid file metadata chunk: number of records is not zero: ",
        chunk.header.num_records())));
  }
  ChainReader<> data_reader(&chunk.data);
  TransposeDecoder transpose_decoder(
      TransposeDecoder::Options().set_recycling_pool_options(
          recycling_pool_options_));
  ChainBackwardWriter<> serialized_metadata_writer(&metadata);
  serialized_metadata_writer.SetWriteSizeHint(chunk.header.decoded_data_size());
  std::vector<size_t> limits;
  const bool decode_ok = transpose_decoder.Decode(
      1, chunk.header.decoded_data_size(), FieldProjection::All(), data_reader,
      serialized_metadata_writer, limits);
  if (ABSL_PREDICT_FALSE(!serialized_metadata_writer.Close())) {
    return Fail(serialized_metadata_writer.status());
  }
  if (ABSL_PREDICT_FALSE(!decode_ok)) return Fail(transpose_decoder.status());
  if (ABSL_PREDICT_FALSE(!data_reader.VerifyEndAndClose())) {
    return Fail(data_reader.status());
  }
  RIEGELI_ASSERT_EQ(limits.size(), 1u)
      << "Metadata chunk has unexpected record limits";
  RIEGELI_ASSERT_EQ(limits.back(), metadata.size())
      << "Metadata chunk has unexpected record limits";
  return true;
}

bool RecordReaderBase::ReadRecord(google::protobuf::MessageLite& record) {
  flatten_ = false;
  return ReadRecordImpl(record);
}

bool RecordReaderBase::ReadRecord(absl::string_view& record) {
  flatten_ = true;
  return ReadRecordImpl(record);
}

bool RecordReaderBase::ReadRecord(std::string& record) {
  flatten_ = false;
  return ReadRecordImpl(record);
}

bool RecordReaderBase::ReadRecord(Chain& record) {
  flatten_ = false;
  return ReadRecordImpl(record);
}

bool RecordReaderBase::ReadRecord(absl::Cord& record) {
  flatten_ = false;
  return ReadRecordImpl(record);
}

template <typename Record>
inline bool RecordReaderBase::ReadRecordImpl(Record& record) {
  last_record_is_valid_ = false;
  for (;;) {
    if (ABSL_PREDICT_TRUE(chunk_decoder_.ReadRecord(record))) {
      RIEGELI_ASSERT_GT(chunk_decoder_.index(), 0u)
          << "ChunkDecoder::ReadRecord() left record index at 0";
      last_record_is_valid_ = true;
      return true;
    }
    if (ABSL_PREDICT_FALSE(!ok())) {
      if (!TryRecovery()) return false;
      continue;
    }
    if (ABSL_PREDICT_FALSE(!chunk_decoder_.ok())) {
      recoverable_ = Recoverable::kRecoverChunkDecoder;
      Fail(chunk_decoder_.status());
      if (!TryRecovery()) return false;
      continue;
    }
    if (ABSL_PREDICT_FALSE(!ReadChunk())) {
      if (!TryRecovery()) return false;
    }
  }
}

bool RecordReaderBase::SetFieldProjection(
    Initializer<FieldProjection> field_projection) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  ChunkReader& src = *SrcChunkReader();
  const uint64_t record_index = chunk_decoder_.index();
  chunk_decoder_.ClearAndSetFieldProjection(std::move(field_projection));
  if (ABSL_PREDICT_FALSE(!src.Seek(chunk_begin_))) return FailSeeking(src);
  if (record_index > 0) {
    if (ABSL_PREDICT_FALSE(!ReadChunk())) return TryRecovery();
    chunk_decoder_.SetIndex(record_index);
  }
  return true;
}

bool RecordReaderBase::Recover(SkippedRegion* skipped_region) {
  if (recoverable_ == Recoverable::kNo) return false;
  ChunkReader& src = *SrcChunkReader();
  RIEGELI_ASSERT(!ok()) << "Failed invariant of RecordReader: "
                           "recovery applicable but RecordReader OK";
  const Recoverable recoverable = recoverable_;
  recoverable_ = Recoverable::kNo;
  if (recoverable != Recoverable::kRecoverChunkReader) {
    RIEGELI_ASSERT(is_open()) << "Failed invariant of RecordReader: "
                                 "recovery does not apply to chunk reader "
                                 "but RecordReader is closed";
  }
  std::string saved_message(status().message());
  MarkNotFailed();
  switch (recoverable) {
    case Recoverable::kNo:
      RIEGELI_ASSUME_UNREACHABLE() << "kNo handled above";
    case Recoverable::kRecoverChunkReader:
      if (ABSL_PREDICT_FALSE(!src.Recover(skipped_region))) {
        return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
      }
      return true;
    case Recoverable::kRecoverChunkDecoder: {
      const uint64_t index_before = chunk_decoder_.index();
      if (ABSL_PREDICT_FALSE(!chunk_decoder_.Recover())) chunk_decoder_.Clear();
      if (skipped_region != nullptr) {
        *skipped_region =
            SkippedRegion(chunk_begin_ + index_before, pos().numeric(),
                          std::move(saved_message));
      }
      return true;
    }
    case Recoverable::kRecoverMetadata:
      if (skipped_region != nullptr) {
        *skipped_region = SkippedRegion(chunk_begin_, pos().numeric(),
                                        std::move(saved_message));
      }
      return true;
  }
  RIEGELI_ASSUME_UNREACHABLE()
      << "Unknown recoverable method: " << static_cast<int>(recoverable);
}

bool RecordReaderBase::SupportsRandomAccess() {
  ChunkReader* const src = SrcChunkReader();
  return src != nullptr && src->SupportsRandomAccess();
}

bool RecordReaderBase::Seek(RecordPosition new_pos) {
  last_record_is_valid_ = false;
  if (ABSL_PREDICT_FALSE(!ok())) return TryRecovery();
  ChunkReader& src = *SrcChunkReader();
  if (new_pos.chunk_begin() == chunk_begin_) {
    if (new_pos.record_index() == 0 || src.pos() > chunk_begin_) {
      // Seeking to the beginning of a chunk does not need reading the chunk,
      // which is important because it may be non-existent at end of file.
      //
      // If `src.pos() > chunk_begin_`, the chunk is already read.
      goto skip_reading_chunk;
    }
  } else {
    if (ABSL_PREDICT_FALSE(!src.Seek(new_pos.chunk_begin()))) {
      return FailSeeking(src);
    }
    if (new_pos.record_index() == 0) {
      // Seeking to the beginning of a chunk does not need reading the chunk,
      // which is important because it may be non-existent at end of file.
      chunk_begin_ = src.pos();
      chunk_decoder_.Clear();
      return true;
    }
  }
  if (ABSL_PREDICT_FALSE(!ReadChunk())) return TryRecovery();
skip_reading_chunk:
  chunk_decoder_.SetIndex(new_pos.record_index());
  return true;
}

bool RecordReaderBase::Seek(Position new_pos) {
  last_record_is_valid_ = false;
  if (ABSL_PREDICT_FALSE(!ok())) return TryRecovery();
  ChunkReader& src = *SrcChunkReader();
  if (new_pos >= chunk_begin_ && new_pos <= src.pos()) {
    // Seeking inside or just after the current chunk which has been read,
    // or to the beginning of the current chunk which has been located,
    // or to the end of file which has been reached.
  } else {
    if (ABSL_PREDICT_FALSE(!src.SeekToChunkContaining(new_pos))) {
      return FailSeeking(src);
    }
    if (src.pos() >= new_pos) {
      // Seeking to the beginning of a chunk does not need reading the chunk,
      // which is important because it may be non-existent at end of file.
      //
      // It is possible that the chunk position is greater than `new_pos` if
      // `new_pos` falls after all records of the previous chunk. This also
      // seeks to the beginning of the chunk.
      chunk_begin_ = src.pos();
      chunk_decoder_.Clear();
      return true;
    }
    if (ABSL_PREDICT_FALSE(!ReadChunk())) return TryRecovery();
  }
  chunk_decoder_.SetIndex(IntCast<uint64_t>(new_pos - chunk_begin_));
  return true;
}

bool RecordReaderBase::SeekBack() {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  last_record_is_valid_ = false;
  if (ABSL_PREDICT_TRUE(chunk_decoder_.index() > 0)) {
    chunk_decoder_.SetIndex(chunk_decoder_.index() - 1);
    return true;
  }
  ChunkReader& src = *SrcChunkReader();
  Position chunk_pos = chunk_begin_;
  while (chunk_pos > 0) {
    if (ABSL_PREDICT_FALSE(!src.SeekToChunkBefore(chunk_pos - 1))) {
      // If recovery succeeds, continue searching back from the beginning of the
      // skipped region.
      chunk_pos = src.pos();
      if (!FailSeeking(src)) return false;
      continue;
    }
    chunk_pos = chunk_begin_;
    if (ABSL_PREDICT_FALSE(!ReadChunk())) {
      // If recovery succeeds, continue searching back from the beginning of the
      // skipped region.
      if (!TryRecovery()) return false;
      continue;
    }
    if (ABSL_PREDICT_TRUE(chunk_decoder_.num_records() > 0)) {
      chunk_decoder_.SetIndex(chunk_decoder_.num_records() - 1);
      return true;
    }
    // The chunk has no records. Continue searching back from the beginning of
    // the chunk.
  }
  return false;
}

std::optional<Position> RecordReaderBase::Size() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  ChunkReader& src = *SrcChunkReader();
  const std::optional<Position> size = src.Size();
  if (ABSL_PREDICT_FALSE(size == std::nullopt)) {
    FailWithoutAnnotation(AnnotateOverSrc(src.status()));
  }
  return size;
}

// Traits for `BinarySearch()`: searching for a chunk.
class RecordReaderBase::ChunkSearchTraits {
 public:
  explicit ChunkSearchTraits(RecordReaderBase* self)
      : self_(RIEGELI_EVAL_ASSERT_NOTNULL(self)) {}

  using Pos = Position;

  bool Empty(Position low, Position high) const { return low >= high; }

  std::optional<Position> Middle(Position low, Position high) const {
    if (low >= high) return std::nullopt;
    ChunkReader& src = *self_->SrcChunkReader();
    if (ABSL_PREDICT_FALSE(!src.SeekToChunkBefore(low + (high - low) / 2))) {
      if (!self_->FailSeeking(src)) {
        // There was a failure or unexpected end of file. Cancel the search.
        return std::nullopt;
      }
      if (src.pos() >= high) {
        // Skipped region after the middle ends after `high`. Find the next
        // chunk after `low` instead.
        if (ABSL_PREDICT_FALSE(!src.Seek(low))) {
          if (!self_->FailSeeking(src) || src.pos() >= high) {
            // There was a failure or unexpected end of file, or the whole range
            // is skipped. Cancel the search.
            return std::nullopt;
          }
        }
      }
    }
    return src.pos();
  }

 private:
  RecordReaderBase* self_;
};

std::optional<PartialOrdering> RecordReaderBase::SearchImpl(
    absl::FunctionRef<std::optional<PartialOrdering>(RecordReaderBase& reader)>
        test) {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  last_record_is_valid_ = false;
  ChunkReader& src = *SrcChunkReader();
  const std::optional<Position> size = src.Size();
  if (ABSL_PREDICT_FALSE(size == std::nullopt)) {
    FailWithoutAnnotation(AnnotateOverSrc(src.status()));
    return std::nullopt;
  }

  struct ChunkSuffix {
    Position chunk_begin;
    uint64_t record_index;
    uint64_t num_records;
  };
  std::optional<ChunkSuffix> less_found;
  uint64_t greater_record_index = 0;
  std::optional<SearchResult<Position>> greater_chunk_begin = BinarySearch(
      0, *size,
      [&](Position chunk_begin) -> std::optional<SearchGuide<Position>> {
        if (ABSL_PREDICT_FALSE(!src.Seek(chunk_begin))) {
          if (!FailSeeking(src)) return std::nullopt;
          // Declare the skipped region unordered.
          return SearchGuide<Position>(PartialOrdering::unordered, src.pos());
        }
        if (ABSL_PREDICT_FALSE(!ReadChunk())) {
          if (!TryRecovery()) {
            if (!ok()) return std::nullopt;
            // The chunk is truncated. Continue the search before the chunk.
            greater_record_index = 0;
            return SearchGuide<Position>(PartialOrdering::greater, chunk_begin);
          }
          // Declare the skipped region unordered.
          return SearchGuide<Position>(PartialOrdering::unordered, src.pos());
        }
        // `src.pos()` points to the next chunk. Adjust `chunk_begin` in case
        // recovery moved it forwards.
        chunk_begin = chunk_begin_;
        const uint64_t num_records = chunk_decoder_.num_records();
        // Judge the chunk by its earliest record which is not unordered.
        for (uint64_t record_index = 0; record_index < num_records;
             ++record_index) {
          if (ABSL_PREDICT_FALSE(
                  !Seek(RecordPosition(chunk_begin, record_index)))) {
            return std::nullopt;
          }
          std::function<bool(const SkippedRegion&, RecordReaderBase&)>
              recovery = std::exchange(recovery_, nullptr);
          const std::optional<PartialOrdering> ordering = test(*this);
          recovery_ = std::move(recovery);
          if (ABSL_PREDICT_FALSE(!ok())) {
            // Reading the record made the `RecordReader` not OK, probably
            // because a message could not be parsed (or `test()` did something
            // unusual).
            if (!TryRecovery()) return std::nullopt;
            // Declare the skipped record unordered.
            continue;
          }
          if (ABSL_PREDICT_FALSE(ordering == std::nullopt)) return std::nullopt;
          if (*ordering < 0) {
            less_found =
                ChunkSuffix{chunk_begin, record_index + 1, num_records};
            return SearchGuide<Position>(PartialOrdering::less, src.pos());
          }
          if (*ordering >= 0) {
            greater_record_index = record_index;
            return SearchGuide<Position>(*ordering, chunk_begin);
          }
        }
        // All records are unordered.
        return SearchGuide<Position>(PartialOrdering::unordered, src.pos());
      },
      ChunkSearchTraits(this));

  if (ABSL_PREDICT_FALSE(greater_chunk_begin == std::nullopt)) {
    return std::nullopt;
  }
  if (greater_chunk_begin->ordering != 0 && less_found != std::nullopt) {
    const std::optional<SearchResult<uint64_t>> less_record_index =
        BinarySearch(
            less_found->record_index, less_found->num_records,
            [&](uint64_t record_index) -> std::optional<PartialOrdering> {
              if (ABSL_PREDICT_FALSE(!Seek(
                      RecordPosition(less_found->chunk_begin, record_index)))) {
                return std::nullopt;
              }
              std::function<bool(const SkippedRegion&, RecordReaderBase&)>
                  recovery = std::exchange(recovery_, nullptr);
              const std::optional<PartialOrdering> ordering = test(*this);
              recovery_ = std::move(recovery);
              if (ABSL_PREDICT_FALSE(!ok())) {
                // Reading the record made the `RecordReader` not OK, probably
                // because a message could not be parsed (or `test()` did
                // something unusual).
                if (!TryRecovery()) return std::nullopt;
                // Declare the skipped record unordered.
                return PartialOrdering::unordered;
              }
              return ordering;
            });
    if (ABSL_PREDICT_FALSE(less_record_index == std::nullopt)) {
      return std::nullopt;
    }
    if (less_record_index->ordering >= 0) {
      greater_chunk_begin->ordering = less_record_index->ordering;
      greater_chunk_begin->found = less_found->chunk_begin;
      greater_record_index = less_record_index->found;
    }
  }
  if (ABSL_PREDICT_FALSE(!Seek(
          RecordPosition(greater_chunk_begin->found, greater_record_index)))) {
    Fail(absl::DataLossError("Riegeli/records file got truncated"));
    return std::nullopt;
  }
  return greater_chunk_begin->ordering;
}

inline bool RecordReaderBase::ReadChunk() {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of RecordReaderBase::ReadChunk()";
  ChunkReader& src = *SrcChunkReader();
  chunk_begin_ = src.pos();
  Chunk chunk;
  if (ABSL_PREDICT_FALSE(!src.ReadChunk(chunk))) {
    chunk_decoder_.Clear();
    if (ABSL_PREDICT_FALSE(!src.ok())) {
      recoverable_ = Recoverable::kRecoverChunkReader;
      return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
    }
    return false;
  }
  if (ABSL_PREDICT_FALSE(!chunk_decoder_.Decode(chunk, flatten_))) {
    recoverable_ = Recoverable::kRecoverChunkDecoder;
    return Fail(chunk_decoder_.status());
  }
  return true;
}

}  // namespace riegeli


================================================
FILE: riegeli/records/record_reader.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_RECORDS_RECORD_READER_H_
#define RIEGELI_RECORDS_RECORD_READER_H_

#include <stdint.h>

#include <functional>
#include <initializer_list>
#include <memory>
#include <optional>
#include <string>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/functional/function_ref.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/chunk_encoding/chunk.h"
#include "riegeli/chunk_encoding/chunk_decoder.h"
#include "riegeli/chunk_encoding/field_projection.h"
#include "riegeli/records/chunk_reader.h"
#include "riegeli/records/record_position.h"
#include "riegeli/records/records_metadata.pb.h"
#include "riegeli/records/skipped_region.h"

namespace riegeli {

// Interprets `record_type_name` and `file_descriptor` from metadata.
class RecordsMetadataDescriptors : public Object {
 public:
  explicit RecordsMetadataDescriptors(const RecordsMetadata& metadata);

  RecordsMetadataDescriptors(RecordsMetadataDescriptors&& that) noexcept;
  RecordsMetadataDescriptors& operator=(RecordsMetadataDescriptors&& that);

  // Returns message descriptor of the record type, or `nullptr` if not
  // available.
  //
  // The message descriptor is valid as long as the `RecordsMetadataDescriptors`
  // object is valid.
  const google::protobuf::Descriptor* descriptor() const;

  // Returns record type full name, or an empty string if not available.
  absl::string_view record_type_name() const { return record_type_name_; }

 private:
  class ErrorCollector;

  std::string record_type_name_;
  std::unique_ptr<google::protobuf::DescriptorPool> pool_;
};

// Template parameter independent part of `RecordReader`.
class RecordReaderBase : public Object {
 public:
  class Options {
   public:
    Options() noexcept {}

    // Specifies the set of fields to be included in returned records, allowing
    // to exclude the remaining fields (but does not guarantee that they will be
    // excluded). Excluding data makes reading faster.
    //
    // Projection is effective if the file has been written with
    // `set_transpose(true)`. Additionally, `set_bucket_fraction()` with a lower
    // value can make reading with projection faster.
    //
    // Default: `FieldProjection::All()`.
    Options& set_field_projection(
        Initializer<FieldProjection> field_projection) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      riegeli::Reset(field_projection_, std::move(field_projection));
      return *this;
    }
    Options&& set_field_projection(
        Initializer<FieldProjection> field_projection) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_field_projection(std::move(field_projection)));
    }
    Options& set_field_projection(
        std::initializer_list<Field> field_projection) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      set_field_projection(Initializer<FieldProjection>(field_projection));
      return *this;
    }
    Options&& set_field_projection(
        std::initializer_list<Field> field_projection) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_field_projection(std::move(field_projection)));
    }
    FieldProjection& field_projection() ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return field_projection_;
    }
    const FieldProjection& field_projection() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return field_projection_;
    }

    // Recovery function to be called after skipping over invalid file contents.
    //
    // If `nullptr`, then invalid file contents cause `RecordReader` to fail.
    // `Recover()` can be used to skip over the invalid region.
    //
    // If not `nullptr`, then invalid file contents cause `RecordReader` to skip
    // over the invalid region and call the recovery function. If the recovery
    // function returns `true`, reading continues. If the recovery function
    // returns `false`, reading ends as if the end of source was encountered.
    //
    // If `Close()` is called and file contents were truncated, the recovery
    // function is called if set. The `RecordReader` remains closed.
    //
    // Calling the following functions may cause the recovery function to be
    // called (in the same thread):
    //  * `Close()` - returns `true`, ignores the result of the recovery
    //                function
    //  * `ReadMetadata()` - returns the result of the recovery function
    //  * `ReadSerializedMetadata()` - returns the result of the recovery
    //                                 function
    //  * `ReadRecord()` - retried if the recovery function returns `true`,
    //                     returns `false` if the recovery function returns
    //                     `false`
    //  * `Seek()` - returns the result of the recovery function
    //  * `Search()` - skips invalid regions if the recovery function returns
    //                 `true`, returns `std::nullopt` if the recovery function
    //                 returns `false`
    //
    // Default: `nullptr`.
    Options& set_recovery(
        Initializer<
            std::function<bool(const SkippedRegion&, RecordReaderBase&)>>
            recovery) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      riegeli::Reset(recovery_, std::move(recovery));
      return *this;
    }
    Options&& set_recovery(
        Initializer<
            std::function<bool(const SkippedRegion&, RecordReaderBase&)>>
            recovery) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_recovery(std::move(recovery)));
    }
    std::function<bool(const SkippedRegion&, RecordReaderBase&)>& recovery()
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recovery_;
    }
    const std::function<bool(const SkippedRegion&, RecordReaderBase&)>&
    recovery() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recovery_;
    }

    // Options for a global `RecyclingPool` of decompression contexts.
    //
    // They tune the amount of memory which is kept to speed up creation of new
    // compression sessions, and usage of a background thread to clean it.
    //
    // Default: `RecyclingPoolOptions()`.
    Options& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      recycling_pool_options_ = recycling_pool_options;
      return *this;
    }
    Options&& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_recycling_pool_options(recycling_pool_options));
    }
    const RecyclingPoolOptions& recycling_pool_options() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recycling_pool_options_;
    }

   private:
    FieldProjection field_projection_ = FieldProjection::All();
    std::function<bool(const SkippedRegion&, RecordReaderBase&)> recovery_;
    RecyclingPoolOptions recycling_pool_options_;
  };

  // Returns the Riegeli/records file being read from. Unchanged by `Close()`.
  virtual ChunkReader* SrcChunkReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Ensures that the file looks like a valid Riegeli/Records file.
  //
  // Reading functions already check the file format. `CheckFileFormat()` can
  // verify the file format before (or instead of) performing other operations.
  //
  // This ignores the recovery function. If invalid file contents are skipped,
  // then checking the file format is meaningless: any file can be read.
  //
  // Return values:
  //  * `true`                 - success
  //  * `false` (when `ok()`)  - source ends
  //  * `false` (when `!ok()`) - failure
  bool CheckFileFormat();

  // Returns file metadata.
  //
  // `ReadMetadata()` must be called while the `RecordReader` is at the
  // beginning of the file (calling `CheckFileFormat()` before is allowed).
  //
  // Record type in metadata can be conveniently interpreted by
  // `RecordsMetadataDescriptors`.
  //
  // Return values:
  //  * `true`                 - success (`metadata` is set)
  //  * `false` (when `ok()`)  - source ends
  //  * `false` (when `!ok()`) - failure
  bool ReadMetadata(RecordsMetadata& metadata);

  // Like `ReadMetadata()`, but metadata is returned in the serialized form.
  //
  // This is faster if the caller needs metadata already serialized.
  bool ReadSerializedMetadata(Chain& metadata);

  // Reads the next record.
  //
  // `ReadRecord(google::protobuf::MessageLite&)` parses raw bytes to a proto
  // message after reading. The remaining overloads read raw bytes. For
  // `ReadRecord(absl::string_view&)` the `absl::string_view` is valid until the
  // next non-const operation on this `RecordReader`.
  //
  // Return values:
  //  * `true`                 - success (`record` is set)
  //  * `false` (when `ok()`)  - source ends
  //  * `false` (when `!ok()`) - failure
  bool ReadRecord(google::protobuf::MessageLite& record);
  bool ReadRecord(absl::string_view& record);
  bool ReadRecord(std::string& record);
  bool ReadRecord(Chain& record);
  bool ReadRecord(absl::Cord& record);

  // Like `Options::set_field_projection()`, but can be done at any time.
  //
  // This may cause reading the current chunk again.
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  bool SetFieldProjection(Initializer<FieldProjection> field_projection);
  bool SetFieldProjection(std::initializer_list<Field> field_projection) {
    return SetFieldProjection(Initializer<FieldProjection>(field_projection));
  }

  // Like `Options::set_recovery()`, but can be done at any time.
  void set_recovery(
      Initializer<std::function<bool(const SkippedRegion&, RecordReaderBase&)>>
          recovery) {
    riegeli::Reset(recovery_, std::move(recovery));
  }

  // Returns the function set by `Options::set_recovery` or `set_recovery()`.
  const std::function<bool(const SkippedRegion&, RecordReaderBase&)>& recovery()
      const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return recovery_;
  }

  // If `!ok()` and the failure was caused by invalid file contents, then
  // `Recover()` tries to recover from the failure and allow reading again by
  // skipping over the invalid region.
  //
  // If `Close()` failed and the failure was caused by truncated file contents,
  // then `Recover()` returns `true`. The `RecordReader` remains closed.
  //
  // If `ok()`, or if `!ok()` but the failure was not caused by invalid file
  // contents, then `Recover()` returns `false`.
  //
  // If `skipped_region != nullptr`, `*skipped_region` is set to the position of
  // the skipped region on success.
  //
  // If a recovery function (`RecordReaderBase::Options::recovery()`) is set,
  // then `Recover()` is called automatically. Otherwise `Recover()` can be
  // called after one of the following functions returned `false`, and the
  // function can be assumed to have returned `true` if `Recover()` returns
  // `true`:
  //  * `Close()`
  //  * `ReadMetadata()`
  //  * `ReadSerializedMetadata()`
  //  * `ReadRecord()` - should be retried if `Recover()` returns `true`
  //  * `Seek()`
  //
  // Return values:
  //  * `true`  - success
  //  * `false` - failure not caused by invalid file contents
  bool Recover(SkippedRegion* skipped_region = nullptr);

  // Returns the canonical position of the last record read.
  //
  // The canonical position is the largest among all equivalent positions.
  // Seeking to any equivalent position leads to reading the same record.
  //
  // `last_pos().numeric()` returns the position as an integer of type
  // `Position`.
  //
  // Precondition: a record was successfully read and there was no intervening
  // call to `Close()`, `Seek()`, `SeekBack()`, or `Search()` (this can be
  // checked with `last_record_is_valid()`).
  RecordPosition last_pos() const;

  // Returns `true` if calling `last_pos()` is valid.
  bool last_record_is_valid() const { return last_record_is_valid_; }

  // Returns a position of the next record (or the end of file if there is no
  // next record).
  //
  // A position which is not canonical can be smaller than the equivalent
  // canonical position. Seeking to any equivalent position leads to reading the
  // same record.
  //
  // `pos().numeric()` returns the position as an integer of type `Position`.
  //
  // `pos()` is unchanged by `Close()`.
  RecordPosition pos() const;

  // Returns `true` if this `RecordReader` supports `Seek()`, `SeekBack()`,
  // `Size()`, and `Search()`.
  bool SupportsRandomAccess();

  // Seeks to a position.
  //
  // In `Seek(RecordPosition)` the position should have been obtained by `pos()`
  // for the same file.
  //
  // In `Seek(Position)` the position can be any integer between 0 and file
  // size. If it points between records, it is interpreted as the next record.
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  bool Seek(RecordPosition new_pos);
  bool Seek(Position new_pos);

  // Seeks back by one record.
  //
  // Return values:
  //  * `true`                 - success (`ok()`)
  //  * `false` (when `ok()`)  - beginning of the source reached
  //  * `false` (when `!ok()`) - failure
  bool SeekBack();

  // Returns the size of the file in bytes, i.e. the position corresponding to
  // its end.
  //
  // Returns `std::nullopt` on failure (`!ok()`).
  std::optional<Position> Size();

  // Searches the file for a desired record, or for a desired position between
  // records, given that it is possible to determine whether a given record is
  // before or after the desired position.
  //
  // The current position before calling `Search()` does not matter.
  //
  // The `test` function takes `*this` as a parameter, seeked to some record,
  // and returns `std::nullopt` or an ordering (a value comparable with literal
  // 0, such as `{Partial,Strong}Ordering`,
  // `{std,absl}::{partial,weak,strong}_ordering`, or `int`):
  //  * `std::nullopt` - Cancel the search.
  //  * `less`          - The current record is before the desired position.
  //  * `equivalent`    - The current record is desired, searching can stop.
  //  * `greater`       - The current record is after the desired position.
  //  * `unordered`     - It could not be determined which is the case.
  //                      The current record will be skipped.
  //
  // The recovery function is set to `nullptr` while calling `test()`. Recovery
  // is handled outside calling `test()`.
  //
  // Preconditions:
  //  * All `less` records precede all `equivalent` records.
  //  * All `equivalent` records precede all `greater` records.
  //  * All `less` records precede all `greater` records,
  //    even if there are no `equivalent` records.
  //
  // Return values:
  //  * `std::nullopt` - Reading failed (`!ok()`)
  //                      or the search was cancelled (`ok()`).
  //  * `equivalent`    - There is some `equivalent` record,
  //                      and `Search()` points to some such record.
  //  * `greater`       - There are no `equivalent` records
  //                      but there is some `greater` record,
  //                      and `Search()` points to the earliest such record.
  //  * `less`          - There are no `equivalent` nor `greater` records
  //                      but there is some `less` record,
  //                      and `Search()` points to the end of file.
  //  * `unordered`     - All records are `unordered`,
  //                      and `Search()` points to the end of file.
  //
  // To find the earliest `equivalent` record instead of an arbitrary one,
  // `test()` can be changed to return `greater` in place of `equivalent`.
  //
  // Further guarantees:
  //  * If a `test()` returns `equivalent`, `Search()` seeks back to the record
  //    before `test()` and returns.
  //  * If a `test()` returns `less`, `test()` will not be called again at
  //    earlier positions.
  //  * If a `test()` returns `greater`, `test()` will not be called again at
  //    later positions.
  //  * `test()` will not be called again at the same position.
  //
  // It follows that if a `test()` returns `equivalent` or `greater`, `Search()`
  // points to the record before the last `test()` call with one of these
  // results. This allows to communicate additional context of an `equivalent`
  // or `greater` result by a side effect of `test()`.
  //
  // For skipping invalid file regions during `Search()`, a recovery function
  // (`RecordReaderBase::Options::recovery()`) can be set, but `Recover()`
  // resumes only simple operations and is not applicable here.
  template <typename Test>
  std::optional<PartialOrdering> Search(Test&& test);

  // A variant of `Search()` which reads a record before calling `test()`,
  // instead of letting `test()` read the record.
  //
  // The `Record` type must be supported by `ReadRecord()`. The `test` function
  // takes `Record&` or `const Record&` as a parameter, and returns
  // `std::nullopt` or an ordering.
  template <typename Record, typename Test>
  std::optional<PartialOrdering> Search(Test&& test);

 protected:
  enum class Recoverable {
    kNo,
    kRecoverChunkReader,
    kRecoverChunkDecoder,
    kRecoverMetadata
  };

  explicit RecordReaderBase(Closed) noexcept;

  RecordReaderBase() noexcept;

  RecordReaderBase(RecordReaderBase&& that) noexcept;
  RecordReaderBase& operator=(RecordReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(ChunkReader* src, Options&& options);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverSrc(absl::Status status);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;

  bool TryRecovery();

  // Position of the beginning of the current chunk or end of file, except when
  // `Seek(Position)` failed to locate the chunk containing the position, in
  // which case this is that position.
  Position chunk_begin_ = 0;

  // Current chunk if a chunk has been read, empty otherwise.
  //
  // Invariants:
  //   if `ok()` then `chunk_decoder_.ok()`
  //   if `!ok()` then
  //       `!chunk_decoder_.ok() ||
  //        chunk_decoder_.index() == chunk_decoder_.num_records()`
  ChunkDecoder chunk_decoder_;

  bool last_record_is_valid_ = false;

  // If `true`, prefer making records readily available as `absl::string_view`.
  bool flatten_ = false;

  // Whether `Recover()` is applicable, and if so, how it should be performed:
  //
  //  * `Recoverable::kNo`                  - `Recover()` is not applicable
  //  * `Recoverable::kRecoverChunkReader`  - `Recover()` tries to recover
  //                                          `chunk_reader_`
  //  * `Recoverable::kRecoverChunkDecoder` - `Recover()` tries to recover
  //                                          `chunk_decoder_`, skips the chunk
  //                                          if that failed
  //
  // Invariants:
  //   if `ok()` then `recoverable_ == Recoverable::kNo`
  //   if `!is_open()` then `recoverable_ == Recoverable::kNo ||
  //                         recoverable_ == Recoverable::kRecoverChunkReader`
  Recoverable recoverable_ = Recoverable::kNo;

  std::function<bool(const SkippedRegion&, RecordReaderBase&)> recovery_;

  RecyclingPoolOptions recycling_pool_options_;

 private:
  class ChunkSearchTraits;

  bool FailReading(const ChunkReader& src);
  bool FailSeeking(const ChunkReader& src);

  bool ParseMetadata(const Chunk& chunk, Chain& metadata);

  template <typename Record>
  bool ReadRecordImpl(Record& record);

  // Reads the next chunk from `chunk_reader_` and decodes it into
  // `chunk_decoder_` and `chunk_begin_`. On failure resets `chunk_decoder_`.
  //
  // Precondition: `ok()`
  bool ReadChunk();

  std::optional<PartialOrdering> SearchImpl(
      absl::FunctionRef<
          std::optional<PartialOrdering>(RecordReaderBase& reader)>
          test);
};

// `RecordReader` reads records of a Riegeli/records file. A record is
// conceptually a binary string; usually it is a serialized proto message.
//
// `RecordReader` supports reading records sequentially, querying for the
// current position, and seeking to continue reading from another position.
//
// For reading records sequentially, this kind of loop can be used:
// ```
//   SomeProto record;
//   while (record_reader_.ReadRecord(record)) {
//     ... Process record.
//   }
//   if (!record_reader_.Close()) {
//     ... Failed with reason: record_reader_.status()
//   }
// ```
//
// For reading records while skipping errors, pass options like these:
// ```
//       riegeli::RecordReaderBase::Options().set_recovery(
//           [&skipped_bytes](const riegeli::SkippedRegion& skipped_region,
//                            riegeli::RecordReaderBase& record_reader) {
//             skipped_bytes += skipped_region.length();
//             return true;
//           })
// ```
//
// An equivalent lower level implementation, without callbacks:
// ```
//   riegeli::Position skipped_bytes = 0;
//   SomeProto record;
//   for (;;) {
//     if (!record_reader_.ReadRecord(record)) {
//       riegeli::SkippedRegion skipped_region;
//       if (record_reader_.Recover(&skipped_region)) {
//         skipped_bytes += skipped_region.length();
//         continue;
//       }
//       break;
//     }
//     ... Process record.
//   }
//   if (!record_reader_.Close()) {
//     riegeli::SkippedRegion skipped_region;
//     if (record_reader_.Recover(&skipped_region)) {
//       skipped_bytes += skipped_region.length();
//     } else {
//       ... Failed with reason: record_reader_.status()
//     }
//   }
// ```
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the byte `Reader`. `Src` must support
// `Dependency<Reader*, Src>`, e.g. `Reader*` (not owned, default),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// `Src` may also specify a `ChunkReader` instead of a byte `Reader`. In this
// case `Src` must support `Dependency<ChunkReader*, Src>`, e.g.
// `ChunkReader*` (not owned), `DefaultChunkReader<>` (owned),
// `std::unique_ptr<ChunkReader>` (owned), `Any<ChunkReader*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The byte `Reader` or `ChunkReader` must not be accessed until the
// `RecordReader` is closed or no longer used.
template <typename Src = Reader*>
class RecordReader : public RecordReaderBase {
 public:
  // Creates a closed `RecordReader`.
  explicit RecordReader(Closed) noexcept : RecordReaderBase(kClosed) {}

  // Will read from the byte `Reader` or `ChunkReader` provided by `src`.
  explicit RecordReader(Initializer<Src> src, Options options = Options());

  RecordReader(RecordReader&& that) = default;
  RecordReader& operator=(RecordReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `RecordReader`. This avoids
  // constructing a temporary `RecordReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());

  // Returns the object providing and possibly owning the byte `Reader` or
  // `ChunkReader`. Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  ChunkReader* SrcChunkReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

  // An optimized implementation in a derived class, avoiding a virtual call.
  RecordPosition pos() const;

 protected:
  void Done() override;

 private:
  // The object providing and possibly owning the byte `Reader` or
  // `ChunkReader`.
  Dependency<ChunkReader*, Src> src_;
};

explicit RecordReader(Closed) -> RecordReader<DeleteCtad<Closed>>;
template <typename Src>
explicit RecordReader(
    Src&& src, RecordReaderBase::Options options = RecordReaderBase::Options())
    -> RecordReader<TargetT<Src>>;

// Implementation details follow.

inline RecordsMetadataDescriptors::RecordsMetadataDescriptors(
    RecordsMetadataDescriptors&& that) noexcept
    : Object(static_cast<Object&&>(that)),
      record_type_name_(std::move(that.record_type_name_)),
      pool_(std::move(that.pool_)) {}

inline RecordsMetadataDescriptors& RecordsMetadataDescriptors::operator=(
    RecordsMetadataDescriptors&& that) {
  Object::operator=(static_cast<Object&&>(that));
  record_type_name_ = std::move(that.record_type_name_),
  pool_ = std::move(that.pool_);
  return *this;
}

inline bool RecordReaderBase::TryRecovery() {
  if (recovery_ == nullptr) return false;
  SkippedRegion skipped_region;
  return Recover(&skipped_region) && recovery_(skipped_region, *this);
}

inline RecordPosition RecordReaderBase::last_pos() const {
  RIEGELI_ASSERT(last_record_is_valid())
      << "Failed precondition of RecordReaderBase::last_pos(): "
         "no record was recently read";
  // `chunk_decoder_.index() == 0` after reading metadata.
  return RecordPosition(chunk_begin_,
                        SaturatingSub(chunk_decoder_.index(), uint64_t{1}));
}

inline RecordPosition RecordReaderBase::pos() const {
  if (ABSL_PREDICT_TRUE(chunk_decoder_.index() <
                        chunk_decoder_.num_records()) ||
      ABSL_PREDICT_FALSE(recoverable_ == Recoverable::kRecoverChunkDecoder)) {
    return RecordPosition(chunk_begin_, chunk_decoder_.index());
  }
  return RecordPosition(SrcChunkReader()->pos(), 0);
}

namespace record_reader_internal {

template <typename T>
inline std::optional<PartialOrdering> AsOptionalPartialOrdering(T test_result) {
  return AsPartialOrdering(test_result);
}

template <typename T>
inline std::optional<PartialOrdering> AsOptionalPartialOrdering(
    std::optional<T> test_result) {
  if (test_result == std::nullopt) return std::nullopt;
  return AsPartialOrdering(*test_result);
}

}  // namespace record_reader_internal

template <typename Test>
std::optional<PartialOrdering> RecordReaderBase::Search(Test&& test) {
  return SearchImpl([&](RecordReaderBase& self) {
    return record_reader_internal::AsOptionalPartialOrdering(test(self));
  });
}

template <typename Record, typename Test>
std::optional<PartialOrdering> RecordReaderBase::Search(Test&& test) {
  Record record;
  return SearchImpl(
      [&](RecordReaderBase& self) -> std::optional<PartialOrdering> {
        if (ABSL_PREDICT_FALSE(!self.ReadRecord(record))) {
          return PartialOrdering::unordered;
        }
        return record_reader_internal::AsOptionalPartialOrdering(test(record));
      });
}

template <typename Src>
inline RecordReader<Src>::RecordReader(Initializer<Src> src, Options options)
    : src_(std::move(src)) {
  Initialize(src_.get(), std::move(options));
}

template <typename Src>
inline void RecordReader<Src>::Reset(Closed) {
  RecordReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void RecordReader<Src>::Reset(Initializer<Src> src, Options options) {
  RecordReaderBase::Reset();
  src_.Reset(std::move(src));
  Initialize(src_.get(), std::move(options));
}

template <typename Src>
void RecordReader<Src>::Done() {
  RecordReaderBase::Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      recoverable_ = Recoverable::kRecoverChunkReader;
      FailWithoutAnnotation(AnnotateOverSrc(src_->status()));
      TryRecovery();
    }
  }
}

template <typename Src>
inline RecordPosition RecordReader<Src>::pos() const {
  if (ABSL_PREDICT_TRUE(chunk_decoder_.index() <
                        chunk_decoder_.num_records()) ||
      ABSL_PREDICT_FALSE(recoverable_ == Recoverable::kRecoverChunkDecoder)) {
    return RecordPosition(chunk_begin_, chunk_decoder_.index());
  }
  return RecordPosition(src_->pos(), 0);
}

}  // namespace riegeli

#endif  // RIEGELI_RECORDS_RECORD_READER_H_


================================================
FILE: riegeli/records/record_writer.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/records/record_writer.h"

#include <stddef.h>
#include <stdint.h>

#include <cmath>
#include <deque>
#include <future>
#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <variant>
#include <vector>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/base/thread_annotations.h"
#include "absl/container/flat_hash_set.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/synchronization/mutex.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/descriptor.pb.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/object.h"
#include "riegeli/base/options_parser.h"
#include "riegeli/base/parallelism.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/chain_writer.h"
#include "riegeli/chunk_encoding/chunk.h"
#include "riegeli/chunk_encoding/chunk_encoder.h"
#include "riegeli/chunk_encoding/compressor_options.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/chunk_encoding/deferred_encoder.h"
#include "riegeli/chunk_encoding/simple_encoder.h"
#include "riegeli/chunk_encoding/transpose_encoder.h"
#include "riegeli/messages/serialize_message.h"
#include "riegeli/records/chunk_writer.h"
#include "riegeli/records/record_position.h"
#include "riegeli/records/records_metadata.pb.h"

namespace riegeli {

namespace {

class FileDescriptorCollector {
 public:
  explicit FileDescriptorCollector(
      google::protobuf::RepeatedPtrField<google::protobuf::FileDescriptorProto>*
          file_descriptors)
      : file_descriptors_(RIEGELI_EVAL_ASSERT_NOTNULL(file_descriptors)) {}

  void AddFile(const google::protobuf::FileDescriptor* file_descriptor) {
    if (!files_seen_.emplace(file_descriptor->name()).second) return;
    for (int i = 0; i < file_descriptor->dependency_count(); ++i) {
      AddFile(file_descriptor->dependency(i));
    }
    file_descriptor->CopyTo(file_descriptors_->Add());
  }

 private:
  google::protobuf::RepeatedPtrField<google::protobuf::FileDescriptorProto>*
      file_descriptors_;
  absl::flat_hash_set<std::string> files_seen_;
};

}  // namespace

void SetRecordType(const google::protobuf::Descriptor& descriptor,
                   RecordsMetadata& metadata) {
  metadata.set_record_type_name(descriptor.full_name());
  metadata.clear_file_descriptor();
  FileDescriptorCollector collector(metadata.mutable_file_descriptor());
  collector.AddFile(descriptor.file());
}

absl::Status RecordWriterBase::Options::FromString(absl::string_view text) {
  std::string compressor_text;
  OptionsParser options_parser;
  options_parser.AddOption("default", ValueParser::FailIfAnySeen());
  options_parser.AddOption(
      "transpose",
      ValueParser::Enum({{"", true}, {"true", true}, {"false", false}},
                        &transpose_));
  options_parser.AddOption("uncompressed",
                           ValueParser::CopyTo(&compressor_text));
  options_parser.AddOption("brotli", ValueParser::CopyTo(&compressor_text));
  options_parser.AddOption("zstd", ValueParser::CopyTo(&compressor_text));
  options_parser.AddOption("snappy", ValueParser::CopyTo(&compressor_text));
  options_parser.AddOption("window_log", ValueParser::CopyTo(&compressor_text));
  options_parser.AddOption("brotli_encoder",
                           ValueParser::CopyTo(&compressor_text));
  uint64_t chunk_size;
  options_parser.AddOption(
      "chunk_size",
      ValueParser::Or(
          ValueParser::Enum({{"auto", std::nullopt}}, &chunk_size_),
          ValueParser::And(
              ValueParser::Bytes(1, std::numeric_limits<uint64_t>::max(),
                                 &chunk_size),
              [this, &chunk_size](ValueParser&) {
                chunk_size_ = chunk_size;
                return true;
              })));
  options_parser.AddOption("bucket_fraction",
                           ValueParser::Real(0.0, 1.0, &bucket_fraction_));
  uint64_t padding;
  options_parser.AddOption(
      "padding",
      ValueParser::And(
          ValueParser::Or(
              ValueParser::Empty(uint64_t{kImplicitPadding}, &padding),
              ValueParser::Bytes(1, std::numeric_limits<uint64_t>::max(),
                                 &padding)),
          [this, &padding](ValueParser&) {
            set_padding(padding);
            return true;
          }));
  options_parser.AddOption(
      "initial_padding",
      ValueParser::And(
          ValueParser::Or(
              ValueParser::Empty(uint64_t{kImplicitPadding}, &padding),
              ValueParser::Bytes(1, std::numeric_limits<uint64_t>::max(),
                                 &padding)),
          [this, &padding](ValueParser&) {
            set_initial_padding(padding);
            return true;
          }));
  options_parser.AddOption(
      "final_padding",
      ValueParser::And(
          ValueParser::Or(
              ValueParser::Empty(uint64_t{kImplicitPadding}, &padding),
              ValueParser::Bytes(1, std::numeric_limits<uint64_t>::max(),
                                 &padding)),
          [this, &padding](ValueParser&) {
            set_final_padding(padding);
            return true;
          }));
  options_parser.AddOption(
      "parallelism",
      ValueParser::Int(0, std::numeric_limits<int>::max(), &parallelism_));
  if (ABSL_PREDICT_FALSE(!options_parser.FromString(text))) {
    return options_parser.status();
  }
  return compressor_options_.FromString(compressor_text);
}

class RecordWriterBase::Worker {
 public:
  explicit Worker(ChunkWriter* chunk_writer, Options&& options);

  virtual ~Worker();

  bool Close();
  virtual absl::Status status() const = 0;
  virtual absl::Status AnnotateStatus(absl::Status status) = 0;

  // Precondition for `Close()`: chunk is not open.

  // Precondition: chunk is not open.
  virtual void OpenChunk() = 0;

  // Precondition: chunk is open.
  template <typename Record>
  bool AddRecord(Record&& record);
  bool AddRecord(const google::protobuf::MessageLite& record,
                 SerializeMessageOptions serialize_options);

  // Precondition: chunk is open.
  //
  // If the result is `false` then `!ok()`.
  virtual bool CloseChunk() = 0;

  bool MaybePadToFinalBoundary();

  // Precondition: chunk is not open.
  virtual bool Flush(FlushType flush_type) = 0;

  // Precondition: chunk is not open.
  virtual FutureStatus FutureFlush(FlushType flush_type) = 0;

  virtual FutureRecordPosition LastPos() const = 0;

  virtual FutureRecordPosition Pos() const = 0;

  virtual Position EstimatedSize() const = 0;

 protected:
  void Initialize(Position initial_pos);

  virtual void Done() {}
  virtual bool ok() const = 0;
  ABSL_ATTRIBUTE_COLD bool Fail(absl::Status status);
  virtual bool FailWithoutAnnotation(absl::Status status) = 0;

  virtual bool WriteSignature() = 0;
  virtual bool WriteMetadata() = 0;
  virtual bool WritePadding(Position padding) = 0;

  std::unique_ptr<ChunkEncoder> MakeChunkEncoder();
  void EncodeSignature(Chunk& chunk);
  bool EncodeMetadata(Chunk& chunk);
  bool EncodeChunk(ChunkEncoder& chunk_encoder, Chunk& chunk);

  ObjectState state_;
  Options options_;
  // Invariant: `chunk_writer_ != nullptr`
  ChunkWriter* chunk_writer_;
  // Invariant: if chunk is open then `chunk_encoder_ != nullptr`
  std::unique_ptr<ChunkEncoder> chunk_encoder_;
};

inline RecordWriterBase::Worker::Worker(ChunkWriter* chunk_writer,
                                        Options&& options)
    : options_(std::move(options)),
      chunk_writer_(RIEGELI_EVAL_ASSERT_NOTNULL(chunk_writer)),
      chunk_encoder_(MakeChunkEncoder()) {
  if (ABSL_PREDICT_FALSE(!chunk_writer_->ok())) {
    // `FailWithoutAnnotation()` is pure virtual and must not be called from the
    // constructor.
    state_.Fail(chunk_writer_->status());
  }
}

RecordWriterBase::Worker::~Worker() {}

bool RecordWriterBase::Worker::Close() {
  if (ABSL_PREDICT_FALSE(!state_.is_open())) return state_.not_failed();
  Done();
  return state_.MarkClosed();
}

inline bool RecordWriterBase::Worker::Fail(absl::Status status) {
  return FailWithoutAnnotation(AnnotateStatus(std::move(status)));
}

inline void RecordWriterBase::Worker::Initialize(Position initial_pos) {
  if (initial_pos == 0) {
    if (ABSL_PREDICT_FALSE(!WriteSignature())) return;
    if (ABSL_PREDICT_FALSE(!WriteMetadata())) return;
  } else {
    WritePadding(options_.initial_padding());
  }
}

inline bool RecordWriterBase::Worker::MaybePadToFinalBoundary() {
  return WritePadding(options_.final_padding());
}

inline std::unique_ptr<ChunkEncoder>
RecordWriterBase::Worker::MakeChunkEncoder() {
  std::unique_ptr<ChunkEncoder> chunk_encoder;
  if (options_.transpose()) {
    const long double long_double_bucket_size =
        std::round(static_cast<long double>(options_.effective_chunk_size()) *
                   static_cast<long double>(options_.bucket_fraction()));
    const uint64_t bucket_size =
        ABSL_PREDICT_FALSE(
            long_double_bucket_size >=
            static_cast<long double>(std::numeric_limits<uint64_t>::max()))
            ? std::numeric_limits<uint64_t>::max()
        : ABSL_PREDICT_TRUE(long_double_bucket_size >= 1.0L)
            ? static_cast<uint64_t>(long_double_bucket_size)
            : uint64_t{1};
    chunk_encoder = std::make_unique<TransposeEncoder>(
        options_.compressor_options(),
        TransposeEncoder::TuningOptions()
            .set_bucket_size(bucket_size)
            .set_recycling_pool_options(options_.recycling_pool_options()));
  } else {
    chunk_encoder = std::make_unique<SimpleEncoder>(
        options_.compressor_options(),
        SimpleEncoder::TuningOptions()
            .set_size_hint(options_.effective_chunk_size())
            .set_recycling_pool_options(options_.recycling_pool_options()));
  }
  if (options_.parallelism() == 0) {
    return chunk_encoder;
  } else {
    return std::make_unique<DeferredEncoder>(std::move(chunk_encoder));
  }
}

inline void RecordWriterBase::Worker::EncodeSignature(Chunk& chunk) {
  chunk.header = ChunkHeader(chunk.data, ChunkType::kFileSignature, 0, 0);
}

inline bool RecordWriterBase::Worker::EncodeMetadata(Chunk& chunk) {
  TransposeEncoder transpose_encoder(
      options_.compressor_options(),
      TransposeEncoder::TuningOptions().set_recycling_pool_options(
          options_.recycling_pool_options()));
  if (ABSL_PREDICT_FALSE(
          options_.metadata() != std::nullopt
              ? !transpose_encoder.AddRecord(*options_.metadata())
              : !transpose_encoder.AddRecord(
                    *options_.serialized_metadata()))) {
    return Fail(transpose_encoder.status());
  }
  ChainWriter<> data_writer(&chunk.data);
  ChunkType chunk_type;
  uint64_t num_records;
  uint64_t decoded_data_size;
  if (ABSL_PREDICT_FALSE(!transpose_encoder.EncodeAndClose(
          data_writer, chunk_type, num_records, decoded_data_size))) {
    return Fail(transpose_encoder.status());
  }
  if (ABSL_PREDICT_FALSE(!data_writer.Close())) {
    return Fail(data_writer.status());
  }
  chunk.header =
      ChunkHeader(chunk.data, ChunkType::kFileMetadata, 0, decoded_data_size);
  return true;
}

template <typename Record>
inline bool RecordWriterBase::Worker::AddRecord(Record&& record) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(
          !chunk_encoder_->AddRecord(std::forward<Record>(record)))) {
    return Fail(chunk_encoder_->status());
  }
  return true;
}

inline bool RecordWriterBase::Worker::AddRecord(
    const google::protobuf::MessageLite& record,
    SerializeMessageOptions serialize_options) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(
          !chunk_encoder_->AddRecord(record, std::move(serialize_options)))) {
    return Fail(chunk_encoder_->status());
  }
  return true;
}

inline bool RecordWriterBase::Worker::EncodeChunk(ChunkEncoder& chunk_encoder,
                                                  Chunk& chunk) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  ChunkType chunk_type;
  uint64_t num_records;
  uint64_t decoded_data_size;
  ChainWriter<> data_writer(&chunk.data);
  if (ABSL_PREDICT_FALSE(!chunk_encoder.EncodeAndClose(
          data_writer, chunk_type, num_records, decoded_data_size))) {
    return Fail(chunk_encoder.status());
  }
  if (ABSL_PREDICT_FALSE(!data_writer.Close())) {
    return Fail(data_writer.status());
  }
  chunk.header =
      ChunkHeader(chunk.data, chunk_type, num_records, decoded_data_size);
  return true;
}

class RecordWriterBase::SerialWorker : public Worker {
 public:
  explicit SerialWorker(ChunkWriter* chunk_writer, Options&& options);

  absl::Status status() const override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatus(absl::Status status) override;

  void OpenChunk() override { chunk_encoder_->Clear(); }
  bool CloseChunk() override;
  bool Flush(FlushType flush_type) override;
  FutureStatus FutureFlush(FlushType flush_type) override;
  FutureRecordPosition LastPos() const override;
  FutureRecordPosition Pos() const override;
  Position EstimatedSize() const override;

 protected:
  bool ok() const override;
  ABSL_ATTRIBUTE_COLD bool FailWithoutAnnotation(absl::Status status) override;

  bool WriteSignature() override;
  bool WriteMetadata() override;
  bool WritePadding(Position padding) override;
};

inline RecordWriterBase::SerialWorker::SerialWorker(ChunkWriter* chunk_writer,
                                                    Options&& options)
    : Worker(chunk_writer, std::move(options)) {
  Initialize(chunk_writer_->pos());
}

inline bool RecordWriterBase::SerialWorker::ok() const { return state_.ok(); }

inline absl::Status RecordWriterBase::SerialWorker::status() const {
  return state_.status();
}

bool RecordWriterBase::SerialWorker::FailWithoutAnnotation(
    absl::Status status) {
  return state_.Fail(std::move(status));
}

absl::Status RecordWriterBase::SerialWorker::AnnotateStatus(
    absl::Status status) {
  return chunk_writer_->AnnotateStatus(std::move(status));
}

bool RecordWriterBase::SerialWorker::WriteSignature() {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chunk chunk;
  EncodeSignature(chunk);
  if (ABSL_PREDICT_FALSE(!chunk_writer_->WriteChunk(chunk))) {
    return FailWithoutAnnotation(chunk_writer_->status());
  }
  return true;
}

bool RecordWriterBase::SerialWorker::WriteMetadata() {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (options_.metadata() == std::nullopt &&
      options_.serialized_metadata() == std::nullopt) {
    return true;
  }
  Chunk chunk;
  if (ABSL_PREDICT_FALSE(!EncodeMetadata(chunk))) return false;
  if (ABSL_PREDICT_FALSE(!chunk_writer_->WriteChunk(chunk))) {
    return FailWithoutAnnotation(chunk_writer_->status());
  }
  return true;
}

bool RecordWriterBase::SerialWorker::CloseChunk() {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chunk chunk;
  if (ABSL_PREDICT_FALSE(!EncodeChunk(*chunk_encoder_, chunk))) return false;
  if (ABSL_PREDICT_FALSE(!chunk_writer_->WriteChunk(chunk))) {
    return FailWithoutAnnotation(chunk_writer_->status());
  }
  return true;
}

bool RecordWriterBase::SerialWorker::WritePadding(Position padding) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(!chunk_writer_->WritePadding(padding))) {
    return FailWithoutAnnotation(chunk_writer_->status());
  }
  return true;
}

bool RecordWriterBase::SerialWorker::Flush(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(!chunk_writer_->Flush(flush_type))) {
    return FailWithoutAnnotation(chunk_writer_->status());
  }
  return true;
}

RecordWriterBase::FutureStatus RecordWriterBase::SerialWorker::FutureFlush(
    FlushType flush_type) {
  std::promise<absl::Status> promise;
  promise.set_value(ABSL_PREDICT_TRUE(Flush(flush_type)) ? absl::OkStatus()
                                                         : status());
  return promise.get_future();
}

FutureRecordPosition RecordWriterBase::SerialWorker::LastPos() const {
  RIEGELI_ASSERT_GT(chunk_encoder_->num_records(), 0u)
      << "Failed invariant of RecordWriterBase::SerialWorker: "
         "last position should be valid but no record was encoded";
  return RecordPosition(chunk_writer_->pos(),
                        chunk_encoder_->num_records() - 1);
}

FutureRecordPosition RecordWriterBase::SerialWorker::Pos() const {
  return RecordPosition(chunk_writer_->pos(), chunk_encoder_->num_records());
}

Position RecordWriterBase::SerialWorker::EstimatedSize() const {
  return chunk_writer_->pos();
}

// `ParallelWorker` uses parallelism internally, but the class is still only
// thread-compatible, not thread-safe.
class RecordWriterBase::ParallelWorker : public Worker {
 public:
  explicit ParallelWorker(ChunkWriter* chunk_writer, Options&& options);

  ~ParallelWorker();

  absl::Status status() const override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatus(absl::Status status) override;

  void OpenChunk() override { chunk_encoder_ = MakeChunkEncoder(); }
  bool CloseChunk() override;
  bool Flush(FlushType flush_type) override;
  FutureStatus FutureFlush(FlushType flush_type) override;
  FutureRecordPosition LastPos() const override;
  FutureRecordPosition Pos() const override;
  Position EstimatedSize() const override;

 protected:
  void Done() override;
  bool ok() const override;
  ABSL_ATTRIBUTE_COLD bool FailWithoutAnnotation(absl::Status status) override;

  bool WriteSignature() override;
  bool WriteMetadata() override;
  bool WritePadding(Position padding) override;

 private:
  struct ChunkPromises {
    std::promise<ChunkHeader> chunk_header;
    std::promise<Chunk> chunk;
  };

  // A request to the chunk writer thread.
  struct DoneRequest {
    std::promise<void> done;
  };
  struct AnnotateStatusRequest {
    absl::Status status;
    std::promise<absl::Status> done;
  };
  struct WriteChunkRequest {
    std::shared_future<ChunkHeader> chunk_header;
    std::future<Chunk> chunk;
  };
  struct WritePaddingRequest {
    Position padding;
  };
  struct FlushRequest {
    FlushType flush_type;
    std::promise<absl::Status> done;
  };
  using ChunkWriterRequest =
      std::variant<DoneRequest, AnnotateStatusRequest, WriteChunkRequest,
                   WritePaddingRequest, FlushRequest>;

  bool HasCapacityForRequest() const ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  records_internal::FutureChunkBegin ChunkBegin() const;

  mutable absl::Mutex mutex_;
  std::deque<ChunkWriterRequest> chunk_writer_requests_ ABSL_GUARDED_BY(mutex_);
  // Position before handling `chunk_writer_requests_`.
  Position pos_before_chunks_ ABSL_GUARDED_BY(mutex_);
};

inline RecordWriterBase::ParallelWorker::ParallelWorker(
    ChunkWriter* chunk_writer, Options&& options)
    : Worker(chunk_writer, std::move(options)),
      pos_before_chunks_(chunk_writer_->pos()) {
  internal::ThreadPool::global().Schedule([this] {
    struct Visitor {
      bool operator()(DoneRequest& request) const {
        request.done.set_value();
        return false;
      }

      bool operator()(AnnotateStatusRequest& request) const {
        request.done.set_value(
            self->chunk_writer_->AnnotateStatus(request.status));
        return true;
      }

      bool operator()(WriteChunkRequest& request) const {
        // If `!ok()`, the chunk must still be waited for, to ensure that the
        // chunk encoder thread exits before the chunk writer thread responds to
        // `DoneRequest`.
        const Chunk chunk = request.chunk.get();
        if (ABSL_PREDICT_FALSE(!self->ok())) return true;
        if (ABSL_PREDICT_FALSE(!self->chunk_writer_->WriteChunk(chunk))) {
          self->FailWithoutAnnotation(self->chunk_writer_->status());
        }
        return true;
      }

      bool operator()(WritePaddingRequest& request) const {
        if (ABSL_PREDICT_FALSE(!self->ok())) return true;
        if (ABSL_PREDICT_FALSE(
                !self->chunk_writer_->WritePadding(request.padding))) {
          self->FailWithoutAnnotation(self->chunk_writer_->status());
        }
        return true;
      }

      bool operator()(FlushRequest& request) const {
        if (ABSL_PREDICT_FALSE(!self->ok())) {
          request.done.set_value(self->status());
          return true;
        }
        if (ABSL_PREDICT_FALSE(
                !self->chunk_writer_->Flush(request.flush_type))) {
          self->FailWithoutAnnotation(self->chunk_writer_->status());
          request.done.set_value(self->status());
          return true;
        }
        request.done.set_value(absl::OkStatus());
        return true;
      }

      ParallelWorker* self;
    };

    mutex_.lock();
    for (;;) {
      mutex_.Await(absl::Condition(
          +[](std::deque<ChunkWriterRequest>* chunk_writer_requests) {
            return !chunk_writer_requests->empty();
          },
          &chunk_writer_requests_));
      ChunkWriterRequest& request = chunk_writer_requests_.front();
      mutex_.unlock();
      if (ABSL_PREDICT_FALSE(!std::visit(Visitor{this}, request))) return;
      mutex_.lock();
      chunk_writer_requests_.pop_front();
      pos_before_chunks_ = chunk_writer_->pos();
    }
  });
  Initialize(pos_before_chunks_);
}

RecordWriterBase::ParallelWorker::~ParallelWorker() {
  if (ABSL_PREDICT_FALSE(state_.is_open())) Done();
}

void RecordWriterBase::ParallelWorker::Done() {
  std::promise<void> done_promise;
  std::future<void> done_future = done_promise.get_future();
  {
    absl::MutexLock lock(mutex_);
    chunk_writer_requests_.emplace_back(DoneRequest{std::move(done_promise)});
  }
  done_future.get();
}

inline bool RecordWriterBase::ParallelWorker::ok() const {
  absl::MutexLock lock(mutex_);
  return state_.ok();
}

inline absl::Status RecordWriterBase::ParallelWorker::status() const {
  absl::MutexLock lock(mutex_);
  return state_.status();
}

bool RecordWriterBase::ParallelWorker::FailWithoutAnnotation(
    absl::Status status) {
  absl::MutexLock lock(mutex_);
  return state_.Fail(std::move(status));
}

absl::Status RecordWriterBase::ParallelWorker::AnnotateStatus(
    absl::Status status) {
  std::promise<absl::Status> done_promise;
  std::future<absl::Status> done_future = done_promise.get_future();
  {
    absl::MutexLock lock(
        mutex_, absl::Condition(this, &ParallelWorker::HasCapacityForRequest));
    chunk_writer_requests_.emplace_back(
        AnnotateStatusRequest{std::move(status), std::move(done_promise)});
  }
  return done_future.get();
}

bool RecordWriterBase::ParallelWorker::HasCapacityForRequest() const {
  return chunk_writer_requests_.size() <
         IntCast<size_t>(options_.parallelism());
}

bool RecordWriterBase::ParallelWorker::WriteSignature() {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Chunk chunk;
  EncodeSignature(chunk);
  ChunkPromises chunk_promises;
  chunk_promises.chunk_header.set_value(chunk.header);
  chunk_promises.chunk.set_value(std::move(chunk));
  {
    absl::MutexLock lock(
        mutex_, absl::Condition(this, &ParallelWorker::HasCapacityForRequest));
    chunk_writer_requests_.emplace_back(
        WriteChunkRequest{chunk_promises.chunk_header.get_future(),
                          chunk_promises.chunk.get_future()});
  }
  return true;
}

bool RecordWriterBase::ParallelWorker::WriteMetadata() {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (options_.metadata() == std::nullopt &&
      options_.serialized_metadata() == std::nullopt) {
    return true;
  }
  ChunkPromises* const chunk_promises = new ChunkPromises();
  {
    absl::MutexLock lock(
        mutex_, absl::Condition(this, &ParallelWorker::HasCapacityForRequest));
    chunk_writer_requests_.emplace_back(
        WriteChunkRequest{chunk_promises->chunk_header.get_future(),
                          chunk_promises->chunk.get_future()});
  }
  internal::ThreadPool::global().Schedule([this, chunk_promises] {
    Chunk chunk;
    EncodeMetadata(chunk);
    chunk_promises->chunk_header.set_value(chunk.header);
    chunk_promises->chunk.set_value(std::move(chunk));
    delete chunk_promises;
  });
  return true;
}

bool RecordWriterBase::ParallelWorker::CloseChunk() {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  ChunkEncoder* const chunk_encoder = chunk_encoder_.release();
  ChunkPromises* const chunk_promises = new ChunkPromises();
  {
    absl::MutexLock lock(
        mutex_, absl::Condition(this, &ParallelWorker::HasCapacityForRequest));
    chunk_writer_requests_.emplace_back(
        WriteChunkRequest{chunk_promises->chunk_header.get_future(),
                          chunk_promises->chunk.get_future()});
  }
  internal::ThreadPool::global().Schedule(
      [this, chunk_encoder, chunk_promises] {
        Chunk chunk;
        EncodeChunk(*chunk_encoder, chunk);
        delete chunk_encoder;
        chunk_promises->chunk_header.set_value(chunk.header);
        chunk_promises->chunk.set_value(std::move(chunk));
        delete chunk_promises;
      });
  return true;
}

bool RecordWriterBase::ParallelWorker::WritePadding(Position padding) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  {
    absl::MutexLock lock(
        mutex_, absl::Condition(this, &ParallelWorker::HasCapacityForRequest));
    chunk_writer_requests_.emplace_back(WritePaddingRequest{padding});
  }
  return true;
}

bool RecordWriterBase::ParallelWorker::Flush(FlushType flush_type) {
  return FutureFlush(flush_type).get().ok();
}

RecordWriterBase::FutureStatus RecordWriterBase::ParallelWorker::FutureFlush(
    FlushType flush_type) {
  std::promise<absl::Status> done_promise;
  FutureStatus done_future = done_promise.get_future();
  {
    absl::MutexLock lock(
        mutex_, absl::Condition(this, &ParallelWorker::HasCapacityForRequest));
    chunk_writer_requests_.emplace_back(
        FlushRequest{flush_type, std::move(done_promise)});
  }
  return done_future;
}

records_internal::FutureChunkBegin
RecordWriterBase::ParallelWorker::ChunkBegin() const {
  struct Visitor {
    void operator()(const DoneRequest&) {}
    void operator()(const AnnotateStatusRequest&) {}
    void operator()(const WriteChunkRequest& request) {
      actions.emplace_back(request.chunk_header);
    }
    void operator()(const WritePaddingRequest& request) {
      actions.emplace_back(
          records_internal::FutureChunkBegin::WritePadding{request.padding});
    }
    void operator()(const FlushRequest&) {}

    std::vector<records_internal::FutureChunkBegin::Action> actions;
  };
  Visitor visitor;
  absl::MutexLock lock(mutex_);
  visitor.actions.reserve(chunk_writer_requests_.size());
  for (const ChunkWriterRequest& request : chunk_writer_requests_) {
    std::visit(visitor, request);
  }
  return records_internal::FutureChunkBegin(pos_before_chunks_,
                                            std::move(visitor.actions));
}

FutureRecordPosition RecordWriterBase::ParallelWorker::LastPos() const {
  RIEGELI_ASSERT_NE(chunk_encoder_, nullptr)
      << "Failed invariant of RecordWriterBase::ParallelWorker: "
         "last position should be valid but chunk is closed";
  RIEGELI_ASSERT_GT(chunk_encoder_->num_records(), 0u)
      << "Failed invariant of RecordWriterBase::ParallelWorker: "
         "last position should be valid but no record was encoded";
  return FutureRecordPosition(ChunkBegin(), chunk_encoder_->num_records() - 1);
}

FutureRecordPosition RecordWriterBase::ParallelWorker::Pos() const {
  // `chunk_encoder_` is `nullptr` when the current chunk is closed, e.g. when
  // `RecordWriter` is closed or if `RecordWriter::Flush()` failed.
  return FutureRecordPosition(ChunkBegin(),
                              ABSL_PREDICT_FALSE(chunk_encoder_ == nullptr)
                                  ? uint64_t{0}
                                  : chunk_encoder_->num_records());
}

Position RecordWriterBase::ParallelWorker::EstimatedSize() const {
  absl::MutexLock lock(mutex_);
  return pos_before_chunks_;
}

RecordWriterBase::RecordWriterBase(Closed) noexcept : Object(kClosed) {}

RecordWriterBase::RecordWriterBase() noexcept {}

void RecordWriterBase::Reset(Closed) {
  DoneBackground();
  Object::Reset(kClosed);
  desired_chunk_size_ = 0;
  chunk_size_so_far_ = 0;
  last_record_ = LastRecordIsInvalid();
}

void RecordWriterBase::Reset() {
  DoneBackground();
  Object::Reset();
  desired_chunk_size_ = 0;
  chunk_size_so_far_ = 0;
  last_record_ = LastRecordIsInvalid();
}

RecordWriterBase::RecordWriterBase(RecordWriterBase&& that) noexcept
    : Object(static_cast<Object&&>(that)),
      desired_chunk_size_(that.desired_chunk_size_),
      chunk_size_so_far_(that.chunk_size_so_far_),
      last_record_(std::exchange(that.last_record_, LastRecordIsInvalid())),
      worker_(std::move(that.worker_)) {}

RecordWriterBase& RecordWriterBase::operator=(
    RecordWriterBase&& that) noexcept {
  if (ABSL_PREDICT_TRUE(&that != this)) {
    DoneBackground();
    Object::operator=(static_cast<Object&&>(that));
    desired_chunk_size_ = that.desired_chunk_size_;
    chunk_size_so_far_ = that.chunk_size_so_far_;
    last_record_ = std::exchange(that.last_record_, LastRecordIsInvalid());
    worker_ = std::move(that.worker_);
  }
  return *this;
}

RecordWriterBase::~RecordWriterBase() {}

void RecordWriterBase::Initialize(ChunkWriter* dest, Options&& options) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of RecordWriter: null ChunkWriter pointer";
  // Ensure that `num_records` does not overflow when `WriteRecordImpl()` keeps
  // `num_records * sizeof(uint64_t)` under `desired_chunk_size_`.
  desired_chunk_size_ = UnsignedMin(options.effective_chunk_size(),
                                    kMaxNumRecords * sizeof(uint64_t));
  if (options.parallelism() == 0) {
    worker_ = std::make_unique<SerialWorker>(dest, std::move(options));
  } else {
    worker_ = std::make_unique<ParallelWorker>(dest, std::move(options));
  }
  if (absl::Status status = worker_->status();
      ABSL_PREDICT_FALSE(!status.ok())) {
    FailWithoutAnnotation(std::move(status));
  }
}

void RecordWriterBase::Done() {
  if (ABSL_PREDICT_FALSE(worker_ == nullptr)) {
    RIEGELI_ASSERT(!is_open()) << "Failed invariant of RecordWriterBase: "
                                  "null worker_ but RecordWriterBase is_open()";
    return;
  }
  if (chunk_size_so_far_ > 0) {
    if (std::holds_alternative<LastRecordIsValid>(last_record_)) {
      last_record_ = LastRecordIsValidAt{worker_->LastPos()};
    }
    if (ABSL_PREDICT_FALSE(!worker_->CloseChunk())) {
      FailWithoutAnnotation(worker_->status());
    }
    chunk_size_so_far_ = 0;
  }
  if (ABSL_PREDICT_FALSE(!worker_->MaybePadToFinalBoundary())) {
    FailWithoutAnnotation(worker_->status());
  }
  if (ABSL_PREDICT_FALSE(!worker_->Close())) {
    FailWithoutAnnotation(worker_->status());
  }
}

void RecordWriterBase::DoneBackground() { worker_.reset(); }

absl::Status RecordWriterBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    RIEGELI_ASSERT_NE(worker_, nullptr)
        << "Failed invariant of RecordWriterBase: "
           "null worker_ but RecordWriterBase is_open()";
    status = worker_->AnnotateStatus(std::move(status));
  }
  return AnnotateOverDest(std::move(status));
}

absl::Status RecordWriterBase::AnnotateOverDest(absl::Status status) {
  return Annotate(status, absl::StrCat("at record ", Pos().get().ToString()));
}

bool RecordWriterBase::WriteRecord(const google::protobuf::MessageLite& record,
                                   SerializeMessageOptions serialize_options) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const size_t size = serialize_options.GetByteSize(record);
  return WriteRecordImpl(size, record, std::move(serialize_options));
}

bool RecordWriterBase::WriteRecord(BytesRef record) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  return WriteRecordImpl(record.size(), record);
}

bool RecordWriterBase::WriteRecord(ExternalRef record) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const size_t size = record.size();
  return WriteRecordImpl(size, std::move(record));
}

bool RecordWriterBase::WriteRecord(const Chain& record) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  return WriteRecordImpl(record.size(), record);
}

bool RecordWriterBase::WriteRecord(Chain&& record) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const size_t size = record.size();
  return WriteRecordImpl(size, std::move(record));
}

bool RecordWriterBase::WriteRecord(const absl::Cord& record) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  return WriteRecordImpl(record.size(), record);
}

bool RecordWriterBase::WriteRecord(absl::Cord&& record) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  const size_t size = record.size();
  return WriteRecordImpl(size, std::move(record));
}

template <typename... Args>
inline bool RecordWriterBase::WriteRecordImpl(size_t size, Args&&... args) {
  last_record_ = LastRecordIsInvalid();
  // Decoding a chunk writes records to one array, and their positions to
  // another array. We limit the size of both arrays together, to include
  // attempts to accumulate an unbounded number of empty records.
  const uint64_t added_size = uint64_t{size} + uint64_t{sizeof(uint64_t)};
  if (ABSL_PREDICT_FALSE(chunk_size_so_far_ + added_size >
                         desired_chunk_size_) &&
      chunk_size_so_far_ > 0) {
    if (ABSL_PREDICT_FALSE(!worker_->CloseChunk())) {
      return FailWithoutAnnotation(worker_->status());
    }
    worker_->OpenChunk();
    chunk_size_so_far_ = 0;
  }
  chunk_size_so_far_ += added_size;
  if (ABSL_PREDICT_FALSE(!worker_->AddRecord(std::forward<Args>(args)...))) {
    return FailWithoutAnnotation(worker_->status());
  }
  if (ABSL_PREDICT_FALSE(chunk_size_so_far_ + uint64_t{sizeof(uint64_t)} >
                         desired_chunk_size_)) {
    // No more records will fit in this chunk, most likely a single record
    // exceeds the desired chunk size. Write the chunk now to avoid keeping a
    // large chunk in memory.
    last_record_ = LastRecordIsValidAt{worker_->LastPos()};
    if (ABSL_PREDICT_FALSE(!worker_->CloseChunk())) {
      return FailWithoutAnnotation(worker_->status());
    }
    worker_->OpenChunk();
    chunk_size_so_far_ = 0;
    return true;
  }
  last_record_ = LastRecordIsValid();
  return true;
}

bool RecordWriterBase::Flush(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (chunk_size_so_far_ > 0) {
    if (std::holds_alternative<LastRecordIsValid>(last_record_)) {
      last_record_ = LastRecordIsValidAt{worker_->LastPos()};
    }
    if (ABSL_PREDICT_FALSE(!worker_->CloseChunk())) {
      return FailWithoutAnnotation(worker_->status());
    }
  }
  if (ABSL_PREDICT_FALSE(!worker_->MaybePadToFinalBoundary())) {
    return FailWithoutAnnotation(worker_->status());
  }
  if (flush_type != FlushType::kFromObject || IsOwning()) {
    if (ABSL_PREDICT_FALSE(!worker_->Flush(flush_type))) {
      return FailWithoutAnnotation(worker_->status());
    }
  }
  if (chunk_size_so_far_ > 0) {
    worker_->OpenChunk();
    chunk_size_so_far_ = 0;
  }
  return true;
}

RecordWriterBase::FutureStatus RecordWriterBase::FutureFlush(
    FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ok())) {
    std::promise<absl::Status> promise;
    promise.set_value(status());
    return promise.get_future();
  }
  if (chunk_size_so_far_ > 0) {
    if (std::holds_alternative<LastRecordIsValid>(last_record_)) {
      last_record_ = LastRecordIsValidAt{worker_->LastPos()};
    }
    if (ABSL_PREDICT_FALSE(!worker_->CloseChunk())) {
      FailWithoutAnnotation(worker_->status());
      std::promise<absl::Status> promise;
      promise.set_value(status());
      return promise.get_future();
    }
  }
  if (ABSL_PREDICT_FALSE(!worker_->MaybePadToFinalBoundary())) {
    FailWithoutAnnotation(worker_->status());
    std::promise<absl::Status> promise;
    promise.set_value(status());
    return promise.get_future();
  }
  FutureStatus result;
  if (flush_type == FlushType::kFromObject && !IsOwning()) {
    std::promise<absl::Status> promise;
    promise.set_value(absl::OkStatus());
    result = promise.get_future();
  } else {
    result = worker_->FutureFlush(flush_type);
  }
  if (chunk_size_so_far_ > 0) {
    worker_->OpenChunk();
    chunk_size_so_far_ = 0;
  }
  return result;
}

FutureRecordPosition RecordWriterBase::LastPos() const {
  RIEGELI_ASSERT(last_record_is_valid())
      << "Failed precondition of RecordWriterBase::LastPos(): "
         "no record was recently written";
  if (const LastRecordIsValidAt* const last_record_at_pos =
          std::get_if<LastRecordIsValidAt>(&last_record_)) {
    return last_record_at_pos->pos;
  }
  RIEGELI_ASSERT_NE(worker_, nullptr)
      << "Failed invariant of RecordWriterBase: "
         "last position should be valid but worker is null";
  return worker_->LastPos();
}

FutureRecordPosition RecordWriterBase::Pos() const {
  if (ABSL_PREDICT_FALSE(worker_ == nullptr)) return FutureRecordPosition();
  return worker_->Pos();
}

Position RecordWriterBase::EstimatedSize() const {
  if (ABSL_PREDICT_FALSE(worker_ == nullptr)) return 0;
  return worker_->EstimatedSize();
}

}  // namespace riegeli


================================================
FILE: riegeli/records/record_writer.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_RECORDS_RECORD_WRITER_H_
#define RIEGELI_RECORDS_RECORD_WRITER_H_

#include <stddef.h>
#include <stdint.h>

#include <future>
#include <memory>
#include <optional>
#include <type_traits>
#include <utility>
#include <variant>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/message_lite.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/stable_dependency.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/compressor_options.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/messages/serialize_message.h"
#include "riegeli/records/block.h"
#include "riegeli/records/chunk_writer.h"
#include "riegeli/records/record_position.h"
#include "riegeli/records/records_metadata.pb.h"

namespace riegeli {

// Sets `record_type_name` and `file_descriptor` in metadata, based on the
// message descriptor of the type of records.
//
// TODO: This currently includes whole file descriptors. It would be
// better to prune them to keep only what is needed for the message descriptor.
void SetRecordType(const google::protobuf::Descriptor& descriptor,
                   RecordsMetadata& metadata);

// Template parameter independent part of `RecordWriter`.
class RecordWriterBase : public Object {
 public:
  constexpr static Position kImplicitPadding = records_internal::kBlockSize;

  class Options {
   public:
    Options() noexcept {}

    // Parses options from text:
    // ```
    //   options ::= option? ("," option?)*
    //   option ::=
    //     "default" |
    //     "transpose" (":" ("true" | "false"))? |
    //     "uncompressed" |
    //     "brotli" (":" brotli_level)? |
    //     "zstd" (":" zstd_level)? |
    //     "snappy" (":" snappy_level)? |
    //     "window_log" ":" window_log |
    //     "brotli_encoder" ":" ("rbrotli_or_cbrotli" | "cbrotli" | "rbrotli") |
    //     "chunk_size" ":" chunk_size |
    //     "bucket_fraction" ":" bucket_fraction |
    //     "padding" (":" padding)? |
    //     "initial_padding" (":" padding)? |
    //     "final_padding" (":" padding)? |
    //     "parallelism" ":" parallelism
    //   brotli_level ::= integer in the range [0..11] (default 6)
    //   zstd_level ::= integer in the range [-131072..22] (default 3)
    //   snappy_level ::= integer in the range [1..2] (default 1)
    //   window_log ::= "auto" or integer in the range [10..31]
    //   chunk_size ::= "auto" or positive integer expressed as real with
    //     optional suffix [BkKMGTPE]
    //   bucket_fraction ::= real in the range [0..1]
    //   padding ::= positive integer expressed as real with optional suffix
    //     [BkKMGTPE] (default 64K)
    //   parallelism ::= non-negative integer
    // ```
    //
    // An empty string is the same as "default".
    //
    // Options are documented below, and also at
    // https://github.com/google/riegeli/blob/master/doc/record_writer_options.md
    //
    // Returns status:
    //  * `status.ok()`  - success
    //  * `!status.ok()` - failure
    absl::Status FromString(absl::string_view text);

    // If `false`, records can be arbitrary strings. A chunk of records is
    // stored in a simple format, directly or with compression.
    //
    // If `true`, records should be serialized proto messages, but nothing
    // breaks if they are not. A chunk of records is processed in a way which
    // allows for better compression of proto messages, by putting next to each
    // other occurrences of the same field across records or across elements of
    // a repeated field.
    //
    // Default: `false`.
    Options& set_transpose(bool transpose) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      transpose_ = transpose;
      return *this;
    }
    Options&& set_transpose(bool transpose) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_transpose(transpose));
    }
    bool transpose() const { return transpose_; }

    // Changes compression algorithm to Uncompressed (turns compression off).
    Options& set_uncompressed() & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      compressor_options_.set_uncompressed();
      return *this;
    }
    Options&& set_uncompressed() && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_uncompressed());
    }

    // Changes compression algorithm to Brotli. Sets compression level which
    // tunes the tradeoff between compression density and compression speed
    // (higher = better density but slower).
    //
    // `compression_level` must be between `kMinBrotli` (0) and
    // `kMaxBrotli` (11). Default: `kDefaultBrotli` (6).
    //
    // This is the default compression algorithm.
    static constexpr int kMinBrotli = CompressorOptions::kMinBrotli;
    static constexpr int kMaxBrotli = CompressorOptions::kMaxBrotli;
    static constexpr int kDefaultBrotli = CompressorOptions::kDefaultBrotli;
    Options& set_brotli(int compression_level = kDefaultBrotli) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      compressor_options_.set_brotli(compression_level);
      return *this;
    }
    Options&& set_brotli(int compression_level = kDefaultBrotli) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_brotli(compression_level));
    }

    // Changes compression algorithm to Zstd. Sets compression level which tunes
    // the tradeoff between compression density and compression speed (higher =
    // better density but slower).
    //
    // `compression_level` must be between `kMinZstd` (-131072) and
    // `kMaxZstd` (22). Level 0 is currently equivalent to 3.
    // Default: `kDefaultZstd` (3).
    static constexpr int kMinZstd = CompressorOptions::kMinZstd;
    static constexpr int kMaxZstd = CompressorOptions::kMaxZstd;
    static constexpr int kDefaultZstd = CompressorOptions::kDefaultZstd;
    Options& set_zstd(int compression_level = kDefaultZstd) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      compressor_options_.set_zstd(compression_level);
      return *this;
    }
    Options&& set_zstd(int compression_level = kDefaultZstd) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_zstd(compression_level));
    }

    // Changes compression algorithm to Snappy.
    //
    // There are no Snappy compression levels to tune.
    static constexpr int kMinSnappy = CompressorOptions::kMinSnappy;
    static constexpr int kMaxSnappy = CompressorOptions::kMaxSnappy;
    static constexpr int kDefaultSnappy = CompressorOptions::kDefaultSnappy;
    Options& set_snappy(int compression_level = kDefaultSnappy) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      compressor_options_.set_snappy(compression_level);
      return *this;
    }
    Options&& set_snappy(int compression_level = kDefaultSnappy) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_snappy(compression_level));
    }

    CompressionType compression_type() const {
      return compressor_options_.compression_type();
    }

    int compression_level() const {
      return compressor_options_.compression_level();
    }

    // Logarithm of the LZ77 sliding window size. This tunes the tradeoff
    // between compression density and memory usage (higher = better density but
    // more memory).
    //
    // Special value `std::nullopt` means to keep the default (Brotli: 22,
    // Zstd: derived from compression level and chunk size).
    //
    // For Uncompressed and Snappy, `window_log` must be `std::nullopt`.
    //
    // For Brotli, `window_log` must be `std::nullopt` or between
    // `BrotliWriterBase::Options::kMinWindowLog` (10) and
    // `BrotliWriterBase::Options::kMaxWindowLog` (30).
    //
    // For Zstd, `window_log` must be `std::nullopt` or between
    // `ZstdWriterBase::Options::kMinWindowLog` (10) and
    // `ZstdWriterBase::Options::kMaxWindowLog` (30 in 32-bit build,
    // 31 in 64-bit build).
    //
    // Default: `std::nullopt`.
    static constexpr int kMinWindowLog = CompressorOptions::kMinWindowLog;
    static constexpr int kMaxWindowLog = CompressorOptions::kMaxWindowLog;
    Options& set_window_log(std::optional<int> window_log) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      compressor_options_.set_window_log(window_log);
      return *this;
    }
    Options&& set_window_log(std::optional<int> window_log) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_window_log(window_log));
    }
    std::optional<int> window_log() const {
      return compressor_options_.window_log();
    }

    // Returns grouped compression options.
    CompressorOptions& compressor_options() ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return compressor_options_;
    }
    const CompressorOptions& compressor_options() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return compressor_options_;
    }

    // Desired uncompressed size of a chunk which groups messages to be
    // transposed, compressed, and written together.
    //
    // A larger chunk size improves compression density; a smaller chunk size
    // allows to read pieces of the file independently with finer granularity,
    // and reduces memory usage of both writer and reader.
    //
    // Special value `std::nullopt` means to keep the default
    // (compressed: 1M, uncompressed: 4k).
    //
    // Default: `std::nullopt`.
    Options& set_chunk_size(std::optional<uint64_t> chunk_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      if (chunk_size != std::nullopt) {
        RIEGELI_ASSERT_GT(*chunk_size, 0u)
            << "Failed precondition of "
               "RecordWriterBase::Options::set_chunk_size(): "
               "zero chunk size";
      }
      chunk_size_ = chunk_size;
      return *this;
    }
    Options&& set_chunk_size(std::optional<uint64_t> chunk_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_chunk_size(chunk_size));
    }
    std::optional<uint64_t> chunk_size() const { return chunk_size_; }
    uint64_t effective_chunk_size() const {
      if (chunk_size_ == std::nullopt) {
        return compression_type() == CompressionType::kNone ? uint64_t{4} << 10
                                                            : uint64_t{1} << 20;
      }
      return *chunk_size_;
    }

    // Desired uncompressed size of a bucket which groups values of several
    // fields of the given wire type to be compressed together, relative to the
    // desired chunk size, on the scale between 0.0 (compress each field
    // separately) to 1.0 (put all fields of the same wire type in the same
    // bucket).
    //
    // This is meaningful if transpose and compression are enabled. A larger
    // bucket size improves compression density; a smaller bucket size makes
    // reading with projection faster, allowing to skip decompression of values
    // of fields which are not included.
    //
    // Default: 1.0.
    Options& set_bucket_fraction(double bucket_fraction) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GE(bucket_fraction, 0.0)
          << "Failed precondition of "
             "RecordWriterBase::Options::set_bucket_fraction(): "
             "negative bucket fraction";
      RIEGELI_ASSERT_LE(bucket_fraction, 1.0)
          << "Failed precondition of "
             "RecordWriterBase::Options::set_bucket_fraction(): "
             "fraction larger than 1";
      bucket_fraction_ = bucket_fraction;
      return *this;
    }
    Options&& set_bucket_fraction(double bucket_fraction) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_bucket_fraction(bucket_fraction));
    }
    double bucket_fraction() const { return bucket_fraction_; }

    // If not `std::nullopt`, sets file metadata to be written at the
    // beginning.
    //
    // Metadata are written only when the file is written from the beginning,
    // not when it is appended to.
    //
    // Record type in metadata can be conveniently set by `SetRecordType()`.
    //
    // Default: `std::nullopt`.
    Options& set_metadata(
        Initializer<std::optional<RecordsMetadata>> metadata) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      riegeli::Reset(metadata_, std::move(metadata));
      serialized_metadata_ = std::nullopt;
      return *this;
    }
    Options&& set_metadata(
        Initializer<std::optional<RecordsMetadata>> metadata) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_metadata(std::move(metadata)));
    }
    std::optional<RecordsMetadata>& metadata() ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return metadata_;
    }
    const std::optional<RecordsMetadata>& metadata() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return metadata_;
    }

    // Like `set_metadata()`, but metadata are passed in the serialized form.
    //
    // This is faster if the caller has metadata already serialized.
    Options& set_serialized_metadata(
        Initializer<std::optional<Chain>> serialized_metadata) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      metadata_ = std::nullopt;
      riegeli::Reset(serialized_metadata_, std::move(serialized_metadata));
      return *this;
    }
    Options&& set_serialized_metadata(
        Initializer<std::optional<Chain>> serialized_metadata) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_serialized_metadata(std::move(serialized_metadata)));
    }
    std::optional<Chain>& serialized_metadata() ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return serialized_metadata_;
    }
    const std::optional<Chain>& serialized_metadata() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return serialized_metadata_;
    }

    // If `padding > 1`, padding is written at the beginning, when flushing,
    // and at the end of the file, for the absolute position to reach a multiple
    // of `padding`.
    //
    // Consequences if `padding` is a multiple of 64KB:
    //
    //  1. Physical concatenation of separately written files yields a valid
    //     file (setting metadata in subsequent files is wasteful but harmless).
    //
    //  2. Even if the existing file was corrupted or truncated, data appended
    //     to it will be recoverable.
    //
    // The cost is that up to `padding` bytes is wasted when padding is written.
    //
    // `set_padding(padding)` is a shortcut for `set_initial_padding(padding)`
    // with `set_final_padding(padding)`.
    //
    // `set_padding()` without the parameter assumes 64KB.
    //
    // Default: 1 (no padding).
    Options& set_padding(Position padding = kImplicitPadding) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GT(padding, 0u)
          << "Failed precondition of RecordWriterBase::Options::set_padding(): "
             "padding must be positive";
      initial_padding_ = padding;
      final_padding_ = padding;
      return *this;
    }
    Options&& set_padding(Position padding = kImplicitPadding) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_padding(padding));
    }

    // If `initial_padding > 1`, padding is written at the beginning of the
    // file, for the absolute position to reach a multiple of `initial_padding`.
    //
    // See `set_padding()` for details.
    //
    // `set_initial_padding()` without the parameter assumes 64KB.
    //
    // Default: 1 (no padding).
    Options& set_initial_padding(Position initial_padding = kImplicitPadding) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GT(initial_padding, 0u)
          << "Failed precondition of "
             "RecordWriterBase::Options::set_initial_padding(): "
             "padding must be positive";
      initial_padding_ = initial_padding;
      return *this;
    }
    Options&& set_initial_padding(
        Position initial_padding = kImplicitPadding) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_initial_padding(initial_padding));
    }
    Position initial_padding() const { return initial_padding_; }

    // If `final_padding > 1`, padding is written when flushing and at the end
    // of the file, for the absolute position to reach a multiple of
    // `final_padding`.
    //
    // See `set_padding()` for details.
    //
    // `set_final_padding()` without the parameter assumes 64KB.
    //
    // Default: 1 (no padding).
    Options& set_final_padding(Position final_padding = kImplicitPadding) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GT(final_padding, 0u)
          << "Failed precondition of "
             "RecordWriterBase::Options::set_final_padding(): "
             "padding must be positive";
      final_padding_ = final_padding;
      return *this;
    }
    Options&& set_final_padding(Position final_padding = kImplicitPadding) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_final_padding(final_padding));
    }
    Position final_padding() const { return final_padding_; }

    // Maximum number of chunks being encoded in parallel in background. Larger
    // parallelism can increase throughput, up to a point where it no longer
    // matters; smaller parallelism reduces memory usage.
    //
    // If `parallelism > 0`, chunks are written to the byte `Writer` in
    // background and reporting writing errors is delayed.
    //
    // Default: 0.
    Options& set_parallelism(int parallelism) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GE(parallelism, 0)
          << "Failed precondition of "
             "RecordWriterBase::Options::set_parallelism(): "
             "negative parallelism";
      parallelism_ = parallelism;
      return *this;
    }
    Options&& set_parallelism(int parallelism) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_parallelism(parallelism));
    }
    int parallelism() const { return parallelism_; }

    // Options for a global `RecyclingPool` of compression contexts.
    //
    // They tune the amount of memory which is kept to speed up creation of new
    // compression sessions, and usage of a background thread to clean it.
    //
    // Default: `RecyclingPoolOptions()`.
    Options& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      recycling_pool_options_ = recycling_pool_options;
      return *this;
    }
    Options&& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_recycling_pool_options(recycling_pool_options));
    }
    const RecyclingPoolOptions& recycling_pool_options() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recycling_pool_options_;
    }

   private:
    bool transpose_ = false;
    CompressorOptions compressor_options_;
    std::optional<uint64_t> chunk_size_;
    double bucket_fraction_ = 1.0;
    std::optional<RecordsMetadata> metadata_;
    std::optional<Chain> serialized_metadata_;
    Position initial_padding_ = 1;
    Position final_padding_ = 1;
    int parallelism_ = 0;
    RecyclingPoolOptions recycling_pool_options_;
  };

  // `get()` returns the resolved value. Can block.
  using FutureStatus = std::shared_future<absl::Status>;

  ~RecordWriterBase();

  // Returns the Riegeli/records file being written to. Unchanged by `Close()`.
  virtual ChunkWriter* DestChunkWriter() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Writes the next record.
  //
  // `WriteRecord(google::protobuf::MessageLite)` serializes a proto message to
  // raw bytes beforehand. The remaining overloads accept raw bytes.
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  bool WriteRecord(
      const google::protobuf::MessageLite& record,
      SerializeMessageOptions serialize_options = SerializeMessageOptions());
  bool WriteRecord(BytesRef record);
  bool WriteRecord(ExternalRef record);
  template <typename Src,
            std::enable_if_t<SupportsExternalRefWhole<Src>::value, int> = 0>
  bool WriteRecord(Src&& src);
  bool WriteRecord(const Chain& record);
  bool WriteRecord(Chain&& record);
  bool WriteRecord(const absl::Cord& record);
  bool WriteRecord(absl::Cord&& record);

  // Finalizes any open chunk and pushes buffered data to the destination.
  // If `Options::parallelism() > 0`, waits for any background writing to
  // complete.
  //
  // This makes data written so far visible, but in contrast to `Close()`,
  // keeps the possibility to write more data later. What exactly does it mean
  // for data to be visible depends on the destination.
  //
  // This degrades compression density if used too often.
  //
  // The scope of objects to flush and the intended data durability (without a
  // guarantee) are specified by `flush_type`:
  //  * `FlushType::kFromObject`  - Makes data written so far visible in other
  //                                objects, propagating flushing through owned
  //                                dependencies of the given writer.
  //  * `FlushType::kFromProcess` - Makes data written so far visible outside
  //                                the process, propagating flushing through
  //                                dependencies of the given writer.
  //                                This is the default.
  //  * `FlushType::kFromMachine` - Makes data written so far visible outside
  //                                the process and durable in case of operating
  //                                system crash, propagating flushing through
  //                                dependencies of the given writer.
  //
  // Return values:
  //  * `true`  - success (`ok()`)
  //  * `false` - failure (`!ok()`)
  bool Flush(FlushType flush_type = FlushType::kFromProcess);

  // Like `Flush()`, but if `Options::parallelism() > 0`, does not wait for
  // background writing to complete. Returns a `FutureStatus` which can be used
  // to wait for background writing to complete.
  //
  // Like any member function, `FutureFlush()` must not be called concurrently
  // with other member functions, but there are no concurrency restrictions on
  // calling `get()` on the result.
  //
  // `Flush()` is similar to `FutureFlush().get().ok()`, except that `Flush()`
  // also propagates the status to the `RecordWriter`.
  FutureStatus FutureFlush(FlushType flush_type = FlushType::kFromProcess);

  // Returns the canonical position of the last record written.
  //
  // The canonical position is the largest among all equivalent positions.
  // Seeking to any equivalent position leads to reading the same record.
  //
  // `LastPos().get().numeric()` returns the position as an integer of type
  // `Position`.
  //
  // Precondition: a record was successfully written (this can be checked with
  // `last_record_is_valid()`).
  FutureRecordPosition LastPos() const;

  // Returns `true` if calling `LastPos()` is valid.
  bool last_record_is_valid() const {
    return !std::holds_alternative<LastRecordIsInvalid>(last_record_);
  }

  // Returns a position of the next record (or the end of file if there is no
  // next record).
  //
  // A position which is not canonical can be smaller than the equivalent
  // canonical position. Seeking to any equivalent position leads to reading the
  // same record.
  //
  // `Pos().get().numeric()` returns the position as an integer of type
  // `Position`.
  //
  // After opening the file, `Close()`, or `Flush()`, `Pos()` is the canonical
  // position of the next record, and `Pos().get().record_index() == 0`.
  FutureRecordPosition Pos() const;

  // Returns an estimation of the file size if no more data is written, without
  // affecting data representation (i.e. without closing the current chunk) and
  // without blocking.
  //
  // This is an underestimation because pending work is not taken into account:
  //  * The currently open chunk.
  //  * If `Options::parallelism() > 0`, chunks being encoded in background.
  //
  // The exact file size can be found by `FutureFlush(FlushType::kFromObject)`
  // which closes the currently open chunk, and `Pos().get().chunk_begin()`
  // (`record_index() == 0` after flushing) which might need to wait for some
  // background work to complete.
  Position EstimatedSize() const;

 protected:
  explicit RecordWriterBase(Closed) noexcept;

  RecordWriterBase() noexcept;

  RecordWriterBase(RecordWriterBase&& that) noexcept;
  RecordWriterBase& operator=(RecordWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset();

  virtual bool IsOwning() const = 0;

  void Initialize(ChunkWriter* dest, Options&& options);
  void DoneBackground();
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverDest(absl::Status status);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;

 private:
  class ParallelWorker;
  class SerialWorker;
  class Worker;

  struct LastRecordIsInvalid {};
  struct LastRecordIsValid {};  // At one record before `Pos()`.
  struct LastRecordIsValidAt {
    FutureRecordPosition pos;
  };

  template <typename... Args>
  bool WriteRecordImpl(size_t size, Args&&... args);

  uint64_t desired_chunk_size_ = 0;
  uint64_t chunk_size_so_far_ = 0;
  std::variant<LastRecordIsInvalid, LastRecordIsValid, LastRecordIsValidAt>
      last_record_ = LastRecordIsInvalid();
  // Invariant: if `is_open()` then `worker_ != nullptr`.
  std::unique_ptr<Worker> worker_;
};

// `RecordWriter` writes records to a Riegeli/records file. A record is
// conceptually a binary string; usually it is a serialized proto message.
//
// For writing records, this kind of loop can be used:
// ```
//   SomeProto record;
//   while (more records to write) {
//     ... Compute record.
//     if (!record_writer_.Write(record)) {
//       // record_writer_.Close() will fail below.
//       break;
//     }
//   }
//   if (!record_writer_.Close()) {
//     ... Failed with reason: record_writer_.status()
//   }
// ```
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the byte `Writer`. `Dest` must support
// `Dependency<Writer*, Dest>`, e.g. `Writer*` (not owned, default),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
//
// `Dest` can also specify a `ChunkWriter` instead of a byte `Writer`. In this
// case `Dest` must support `Dependency<ChunkWriter*, Dest>`, e.g.
// `ChunkWriter*` (not owned), `DefaultChunkWriter<>` (owned),
// `std::unique_ptr<ChunkWriter>` (owned), `Any<ChunkWriter*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The byte `Writer` or `ChunkWriter` must not be accessed until the
// `RecordWriter` is closed or (when parallelism in options is 0) no longer
// used.
template <typename Dest = Writer*>
class RecordWriter : public RecordWriterBase {
 public:
  // Creates a closed `RecordWriter`.
  explicit RecordWriter(Closed) noexcept : RecordWriterBase(kClosed) {}

  // Will write to the byte `Writer` or `ChunkWriter` provided by `dest`.
  explicit RecordWriter(Initializer<Dest> dest, Options options = Options());

  RecordWriter(RecordWriter&& that) = default;
  RecordWriter& operator=(RecordWriter&& that) = default;

  ~RecordWriter() override { DoneBackground(); }

  // Makes `*this` equivalent to a newly constructed `RecordWriter`. This avoids
  // constructing a temporary `RecordWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the byte `Writer` or
  // `ChunkWriter`. Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  ChunkWriter* DestChunkWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;

  bool IsOwning() const override { return dest_.IsOwning(); }

 private:
  // The object providing and possibly owning the byte `Writer` or
  // `ChunkWriter`.
  StableDependency<ChunkWriter*, Dest> dest_;
};

explicit RecordWriter(Closed) -> RecordWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit RecordWriter(Dest&& dest, RecordWriterBase::Options options =
                                       RecordWriterBase::Options())
    -> RecordWriter<TargetT<Dest>>;

// Implementation details follow.

template <typename Src,
          std::enable_if_t<SupportsExternalRefWhole<Src>::value, int>>
inline bool RecordWriterBase::WriteRecord(Src&& src) {
  return WriteRecord(ExternalRef(std::forward<Src>(src)));
}

template <typename Dest>
inline RecordWriter<Dest>::RecordWriter(Initializer<Dest> dest, Options options)
    : dest_(std::move(dest)) {
  Initialize(dest_.get(), std::move(options));
}

template <typename Dest>
inline void RecordWriter<Dest>::Reset(Closed) {
  RecordWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void RecordWriter<Dest>::Reset(Initializer<Dest> dest, Options options) {
  RecordWriterBase::Reset();
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), std::move(options));
}

template <typename Dest>
void RecordWriter<Dest>::Done() {
  RecordWriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
}

}  // namespace riegeli

#endif  // RIEGELI_RECORDS_RECORD_WRITER_H_


================================================
FILE: riegeli/records/records_metadata.proto
================================================
edition = "2024";

package riegeli;

import "google/protobuf/descriptor.proto";

// Information about a Riegeli/records file, which may be helpful to interpret
// file contents.
message RecordsMetadata {
  // Human-readable explanation of what the file contains.
  string file_comment = 1;

  // If records are proto messages of a fixed type, the full name of their type.
  string record_type_name = 2;

  // If `record_type_name` is set, proto file descriptors which should contain
  // the definition of that type and their dependencies (each file comes after
  // all its dependencies).
  //
  // If `file_descriptor` is empty but `record_type_name` is set (not
  // recommended), `record_type_name` can be interpreted in the context of an
  // unspecified proto descriptor database.
  repeated google.protobuf.FileDescriptorProto file_descriptor = 3;

  // Options originally used to encode the file:
  // https://github.com/google/riegeli/blob/master/doc/record_writer_options.md
  //
  // They are informative here, they are never necessary to decode the file.
  string record_writer_options = 4;

  // Number of records in the file, so that the reader can tune for it.
  //
  // This is informative, the actual number of records may differ.
  int64 num_records = 5;

  // Clients can define custom metadata in extensions of this message.
  extensions 1000 to max;
}


================================================
FILE: riegeli/records/skipped_region.cc
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/records/skipped_region.h"

#include <ostream>
#include <string>

#include "absl/strings/str_cat.h"

namespace riegeli {

std::string SkippedRegion::ToString() const {
  return absl::StrCat("[", begin_, "..", end_, "): ", message_);
}

void SkippedRegion::Output(std::ostream& dest) const { dest << ToString(); }

}  // namespace riegeli


================================================
FILE: riegeli/records/skipped_region.h
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_RECORDS_SKIPPED_REGION_H_
#define RIEGELI_RECORDS_SKIPPED_REGION_H_

#include <iosfwd>
#include <string>
#include <utility>

#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/string_ref.h"
#include "riegeli/base/types.h"

namespace riegeli {

// Details about a skipped region of invalid file contents.
class SkippedRegion {
 public:
  SkippedRegion() = default;

  // Creates a `SkippedRegion` with the given region location and message
  // explaining why the region is invalid.
  explicit SkippedRegion(Position begin, Position end,
                         StringInitializer message);

  SkippedRegion(const SkippedRegion& that) = default;
  SkippedRegion& operator=(const SkippedRegion& that) = default;

  SkippedRegion(SkippedRegion&&) = default;
  SkippedRegion& operator=(SkippedRegion&&) = default;

  // File position of the beginning of the skipped region, inclusive.
  Position begin() const { return begin_; }
  // File position of the end of the skipped region, exclusive.
  Position end() const { return end_; }

  // Length of the skipped region, in bytes.
  Position length() const { return end_ - begin_; }

  // Message explaining why the region is invalid.
  absl::string_view message() const { return message_; }

  // Formats `SkippedRegion` as string: "[<begin>..<end>): <message>".
  std::string ToString() const;

  // Default stringification by `absl::StrCat()` etc.
  //
  // Writes `src.ToString()` to `dest`.
  template <typename Sink>
  friend void AbslStringify(Sink& dest, const SkippedRegion& src) {
    dest.Append(src.ToString());
  }

  // Writes `src.ToString()` to `out`.
  friend std::ostream& operator<<(std::ostream& dest,
                                  const SkippedRegion& src) {
    src.Output(dest);
    return dest;
  }

 private:
  void Output(std::ostream& dest) const;

  Position begin_ = 0;
  Position end_ = 0;
  std::string message_;
};

// Implementation details follow.

inline SkippedRegion::SkippedRegion(Position begin, Position end,
                                    StringInitializer message)
    : begin_(begin), end_(end), message_(std::move(message)) {
  RIEGELI_ASSERT_LE(begin, end)
      << "Failed precondition of SkippedRegion::SkippedRegion: "
         "positions in the wrong order";
}

}  // namespace riegeli

#endif  // RIEGELI_RECORDS_SKIPPED_REGION_H_


================================================
FILE: riegeli/records/tools/BUILD
================================================
load("@com_google_protobuf//bazel:cc_proto_library.bzl", "cc_proto_library")
load("@com_google_protobuf//bazel:proto_library.bzl", "proto_library")
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")

package(features = ["header_modules"])

licenses(["notice"])

cc_binary(
    name = "describe_riegeli_file",
    srcs = ["describe_riegeli_file.cc"],
    deps = [
        ":riegeli_summary_cc_proto",
        "//riegeli/base:any",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:chain",
        "//riegeli/base:initializer",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:backward_writer",
        "//riegeli/bytes:chain_backward_writer",
        "//riegeli/bytes:chain_reader",
        "//riegeli/bytes:fd_reader",
        "//riegeli/bytes:limiting_reader",
        "//riegeli/bytes:null_backward_writer",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:std_io",
        "//riegeli/bytes:writer",
        "//riegeli/chunk_encoding:chunk",
        "//riegeli/chunk_encoding:constants",
        "//riegeli/chunk_encoding:decompressor",
        "//riegeli/chunk_encoding:field_projection",
        "//riegeli/chunk_encoding:transpose_decoder",
        "//riegeli/lines:line_writing",
        "//riegeli/lines:text_writer",
        "//riegeli/messages:parse_message",
        "//riegeli/messages:text_print_message",
        "//riegeli/records:chunk_reader",
        "//riegeli/records:records_metadata_cc_proto",
        "//riegeli/records:skipped_region",
        "//riegeli/varint:varint_reading",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/cleanup",
        "@com_google_absl//absl/flags:flag",
        "@com_google_absl//absl/flags:parse",
        "@com_google_absl//absl/flags:usage",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_protobuf//:protobuf_lite",
    ],
)

proto_library(
    name = "riegeli_summary_proto",
    srcs = ["riegeli_summary.proto"],
    deps = ["//riegeli/records:records_metadata_proto"],
)

cc_proto_library(
    name = "riegeli_summary_cc_proto",
    deps = [":riegeli_summary_proto"],
)

cc_binary(
    name = "records_benchmark",
    srcs = ["records_benchmark.cc"],
    deps = [
        ":tfrecord_recognizer",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:initializer",
        "//riegeli/base:options_parser",
        "//riegeli/bytes:fd_reader",
        "//riegeli/bytes:fd_writer",
        "//riegeli/bytes:std_io",
        "//riegeli/bytes:writer",
        "//riegeli/lines:line_writing",
        "//riegeli/lines:newline",
        "//riegeli/lines:text_writer",
        "//riegeli/records:chunk_reader",
        "//riegeli/records:record_reader",
        "//riegeli/records:record_writer",
        "//riegeli/text:ascii_align",
        "//riegeli/varint:varint_writing",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/flags:flag",
        "@com_google_absl//absl/flags:parse",
        "@com_google_absl//absl/flags:usage",
        "@com_google_absl//absl/functional:function_ref",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "@local_config_tf//:libtensorflow_framework",
        "@local_config_tf//:tf_header_lib",
    ],
)

cc_library(
    name = "tfrecord_recognizer",
    srcs = ["tfrecord_recognizer.cc"],
    hdrs = ["tfrecord_recognizer.h"],
    deps = [
        "//riegeli/base:any",
        "//riegeli/base:assert",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/bytes:reader",
        "//riegeli/endian:endian_reading",
        "//riegeli/zlib:zlib_reader",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@local_config_tf//:tf_header_lib",
    ],
)


================================================
FILE: riegeli/records/tools/describe_riegeli_file.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <optional>
#include <string>
#include <vector>

#include "absl/base/optimization.h"
#include "absl/cleanup/cleanup.h"
#include "absl/flags/flag.h"
#include "absl/flags/parse.h"
#include "absl/flags/usage.h"
#include "absl/status/status.h"
#include "absl/strings/escaping.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "google/protobuf/repeated_ptr_field.h"
#include "riegeli/base/any.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/chain_backward_writer.h"
#include "riegeli/bytes/chain_reader.h"
#include "riegeli/bytes/fd_reader.h"
#include "riegeli/bytes/limiting_reader.h"
#include "riegeli/bytes/null_backward_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/std_io.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/chunk_encoding/chunk.h"
#include "riegeli/chunk_encoding/constants.h"
#include "riegeli/chunk_encoding/decompressor.h"
#include "riegeli/chunk_encoding/field_projection.h"
#include "riegeli/chunk_encoding/transpose_decoder.h"
#include "riegeli/lines/line_writing.h"
#include "riegeli/lines/text_writer.h"
#include "riegeli/messages/parse_message.h"
#include "riegeli/messages/text_print_message.h"
#include "riegeli/records/chunk_reader.h"
#include "riegeli/records/records_metadata.pb.h"
#include "riegeli/records/skipped_region.h"
#include "riegeli/records/tools/riegeli_summary.pb.h"
#include "riegeli/varint/varint_reading.h"

ABSL_FLAG(bool, show_records_metadata, false,
          "If true, show parsed file metadata.");
ABSL_FLAG(bool, show_record_sizes, false,
          "If true, show the list of record sizes in each chunk.");
ABSL_FLAG(bool, show_records, false,
          "If true, show contents of records in each chunk.");

namespace riegeli::tools {
namespace {

absl::Status DescribeFileMetadataChunk(const Chunk& chunk,
                                       RecordsMetadata& records_metadata) {
  // Based on `RecordReaderBase::ParseMetadata()`.
  if (ABSL_PREDICT_FALSE(chunk.header.num_records() != 0)) {
    return absl::InvalidArgumentError(absl::StrCat(
        "Invalid file metadata chunk: number of records is not zero: ",
        chunk.header.num_records()));
  }
  ChainReader data_reader(&chunk.data);
  TransposeDecoder transpose_decoder;
  ChainBackwardWriter serialized_metadata_writer;
  serialized_metadata_writer.SetWriteSizeHint(chunk.header.decoded_data_size());
  std::vector<size_t> limits;
  const bool decode_ok = transpose_decoder.Decode(
      1, chunk.header.decoded_data_size(), FieldProjection::All(), data_reader,
      serialized_metadata_writer, limits);
  if (ABSL_PREDICT_FALSE(!serialized_metadata_writer.Close())) {
    return serialized_metadata_writer.status();
  }
  if (ABSL_PREDICT_FALSE(!decode_ok)) return transpose_decoder.status();
  if (ABSL_PREDICT_FALSE(!data_reader.VerifyEndAndClose())) {
    return data_reader.status();
  }
  const Chain& serialized_metadata = serialized_metadata_writer.dest();
  RIEGELI_ASSERT_EQ(limits.size(), 1u)
      << "Metadata chunk has unexpected record limits";
  RIEGELI_ASSERT_EQ(limits.back(), serialized_metadata.size())
      << "Metadata chunk has unexpected record limits";
  return ParseMessage(serialized_metadata, records_metadata);
}

absl::Status ReadRecords(
    Reader& src, const std::vector<size_t>& limits,
    google::protobuf::RepeatedPtrField<std::string>& dest) {
  // Based on `ChunkDecoder::ReadRecord()`.
  const Position initial_pos = src.pos();
  for (const size_t limit : limits) {
    const size_t start = IntCast<size_t>(src.pos() - initial_pos);
    RIEGELI_ASSERT_LE(start, limit) << "Record end positions not sorted";
    if (ABSL_PREDICT_FALSE(!src.Read(limit - start, *dest.Add()))) {
      return src.StatusOrAnnotate(
          absl::InvalidArgumentError("Reading record failed"));
    }
  }
  return absl::OkStatus();
}

absl::Status DescribeSimpleChunk(const Chunk& chunk,
                                 summary::SimpleChunk& simple_chunk) {
  ChainReader src(&chunk.data);
  const bool show_record_sizes = absl::GetFlag(FLAGS_show_record_sizes);
  const bool show_records = absl::GetFlag(FLAGS_show_records);

  // Based on `SimpleDecoder::Decode()`.
  uint8_t compression_type_byte;
  if (ABSL_PREDICT_FALSE(!src.ReadByte(compression_type_byte))) {
    return absl::InvalidArgumentError("Reading compression type failed");
  }
  const CompressionType compression_type =
      static_cast<CompressionType>(compression_type_byte);
  simple_chunk.set_compression_type(
      static_cast<summary::CompressionType>(compression_type));

  if (show_record_sizes || show_records) {
    uint64_t sizes_size;
    if (ABSL_PREDICT_FALSE(!ReadVarint64(src, sizes_size))) {
      return absl::InvalidArgumentError("Reading size of sizes failed");
    }

    chunk_encoding_internal::Decompressor<LimitingReader<>> sizes_decompressor(
        riegeli::Maker(
            &src, LimitingReaderBase::Options().set_exact_length(sizes_size)),
        compression_type);
    if (ABSL_PREDICT_FALSE(!sizes_decompressor.ok())) {
      return sizes_decompressor.status();
    }
    if (show_record_sizes) {
      if (ABSL_PREDICT_FALSE(chunk.header.num_records() >
                             unsigned{std::numeric_limits<int>::max()})) {
        return absl::ResourceExhaustedError("Too many records");
      }
      simple_chunk.mutable_record_sizes()->Reserve(
          IntCast<int>(chunk.header.num_records()));
    }
    std::vector<size_t> limits;
    if (show_records) {
      if (ABSL_PREDICT_FALSE(chunk.header.num_records() > limits.max_size())) {
        return absl::ResourceExhaustedError("Too many records");
      }
      limits.reserve(IntCast<size_t>(chunk.header.num_records()));
    }
    size_t limit = 0;
    for (uint64_t i = 0; i < chunk.header.num_records(); ++i) {
      uint64_t size;
      if (ABSL_PREDICT_FALSE(
              !ReadVarint64(sizes_decompressor.reader(), size))) {
        return sizes_decompressor.reader().StatusOrAnnotate(
            absl::InvalidArgumentError("Reading record size failed"));
      }
      if (ABSL_PREDICT_FALSE(size > chunk.header.decoded_data_size() - limit)) {
        return absl::InvalidArgumentError(
            "Decoded data size larger than expected");
      }
      limit += IntCast<size_t>(size);
      if (show_record_sizes) simple_chunk.add_record_sizes(size);
      if (show_records) limits.push_back(limit);
    }
    if (ABSL_PREDICT_FALSE(!sizes_decompressor.VerifyEndAndClose())) {
      return sizes_decompressor.status();
    }
    if (ABSL_PREDICT_FALSE(limit != chunk.header.decoded_data_size())) {
      return absl::InvalidArgumentError(
          "Decoded data size smaller than expected");
    }

    if (show_records) {
      chunk_encoding_internal::Decompressor<> records_decompressor(
          &src, compression_type);
      if (ABSL_PREDICT_FALSE(!records_decompressor.ok())) {
        return records_decompressor.status();
      }
      if (absl::Status status =
              ReadRecords(records_decompressor.reader(), limits,
                          *simple_chunk.mutable_records());
          !status.ok()) {
        return status;
      }
      if (ABSL_PREDICT_FALSE(!records_decompressor.VerifyEndAndClose())) {
        return records_decompressor.status();
      }
      if (ABSL_PREDICT_FALSE(!src.VerifyEndAndClose())) return src.status();
    }
  }
  return absl::OkStatus();
}

absl::Status DescribeTransposedChunk(
    const Chunk& chunk, summary::TransposedChunk& transposed_chunk) {
  ChainReader src(&chunk.data);
  const bool show_record_sizes = absl::GetFlag(FLAGS_show_record_sizes);
  const bool show_records = absl::GetFlag(FLAGS_show_records);

  // Based on `TransposeDecoder::Decode()`.
  uint8_t compression_type_byte;
  if (ABSL_PREDICT_FALSE(!src.ReadByte(compression_type_byte))) {
    return absl::InvalidArgumentError("Reading compression type failed");
  }
  transposed_chunk.set_compression_type(
      static_cast<summary::CompressionType>(compression_type_byte));

  if (show_record_sizes || show_records) {
    // Based on `ChunkDecoder::Parse()`.
    src.Seek(0);
    TransposeDecoder transpose_decoder;
    Chain dest;
    Any<BackwardWriter*>::Inlining<ChainBackwardWriter<>, NullBackwardWriter>
        dest_writer;
    if (show_records) {
      dest_writer = riegeli::Maker<ChainBackwardWriter>(&dest);
    } else {
      dest_writer = riegeli::Maker<NullBackwardWriter>();
    }
    dest_writer->SetWriteSizeHint(chunk.header.decoded_data_size());
    std::vector<size_t> limits;
    const bool decode_ok = transpose_decoder.Decode(
        chunk.header.num_records(), chunk.header.decoded_data_size(),
        FieldProjection::All(), src, *dest_writer, limits);
    if (ABSL_PREDICT_FALSE(!dest_writer->Close())) return dest_writer->status();
    if (ABSL_PREDICT_FALSE(!decode_ok)) return transpose_decoder.status();
    if (show_record_sizes) {
      if (ABSL_PREDICT_FALSE(limits.size() >
                             unsigned{std::numeric_limits<int>::max()})) {
        return absl::ResourceExhaustedError("Too many records");
      }
      transposed_chunk.mutable_record_sizes()->Reserve(
          IntCast<int>(limits.size()));
      size_t prev_limit = 0;
      for (const size_t next_limit : limits) {
        RIEGELI_ASSERT_LE(prev_limit, next_limit)
            << "Failed postcondition of TransposeDecoder: "
               "record end positions not sorted";
        transposed_chunk.add_record_sizes(next_limit - prev_limit);
        prev_limit = next_limit;
      }
    }

    if (show_records) {
      ChainReader records_reader(&dest);
      if (absl::Status status = ReadRecords(
              records_reader, limits, *transposed_chunk.mutable_records());
          !status.ok()) {
        return status;
      }
      if (ABSL_PREDICT_FALSE(!records_reader.VerifyEndAndClose())) {
        return records_reader.status();
      }
    }
    if (ABSL_PREDICT_FALSE(!src.VerifyEndAndClose())) return src.status();
  }
  return absl::OkStatus();
}

void DescribeFile(absl::string_view filename, Writer& report) {
  WriteLine("file {", report);
  WriteLine("  filename: \"", absl::Utf8SafeCEscape(filename), '"', report);
  DefaultChunkReader chunk_reader(riegeli::Maker<FdReader>(filename));
  if (chunk_reader.SupportsRandomAccess()) {
    const std::optional<Position> size = chunk_reader.Size();
    if (size != std::nullopt) WriteLine("  file_size: ", *size, report);
  }
  TextPrintMessageOptions print_options;
  print_options.printer().SetInitialIndentLevel(2);
  print_options.printer().SetUseShortRepeatedPrimitives(true);
  print_options.printer().SetUseUtf8StringEscaping(true);
  for (;;) {
    report.Flush();

    const ChunkHeader* chunk_header;
    if (ABSL_PREDICT_FALSE(!chunk_reader.PullChunkHeader(&chunk_header))) {
      SkippedRegion skipped_region;
      if (chunk_reader.Recover(&skipped_region)) {
        WriteLine("  # FILE CORRUPTED: ", skipped_region.ToString(), report);
        continue;
      }
      break;
    }

    summary::Chunk chunk_summary;
    chunk_summary.set_chunk_begin(chunk_reader.pos());
    chunk_summary.set_chunk_type(
        static_cast<summary::ChunkType>(chunk_header->chunk_type()));
    chunk_summary.set_data_size(chunk_header->data_size());
    chunk_summary.set_num_records(chunk_header->num_records());
    chunk_summary.set_decoded_data_size(chunk_header->decoded_data_size());

    WriteLine("  chunk {", report);
    absl::Cleanup report_chunk = [&] {
      TextPrintMessage(chunk_summary, TextWriter(&report), print_options)
          .IgnoreError();
      WriteLine("  }", report);
    };

    Chunk chunk;
    if (ABSL_PREDICT_FALSE(!chunk_reader.ReadChunk(chunk))) {
      SkippedRegion skipped_region;
      if (chunk_reader.Recover(&skipped_region)) {
        WriteLine("    # FILE CORRUPTED: ", skipped_region.ToString(), report);
        continue;
      }
      break;
    }

    absl::Status status;
    switch (chunk_summary.chunk_type()) {
      case summary::FILE_METADATA:
        if (absl::GetFlag(FLAGS_show_records_metadata)) {
          status = DescribeFileMetadataChunk(
              chunk, *chunk_summary.mutable_file_metadata_chunk());
        }
        break;
      case summary::SIMPLE:
        status =
            DescribeSimpleChunk(chunk, *chunk_summary.mutable_simple_chunk());
        break;
      case summary::TRANSPOSED:
        status = DescribeTransposedChunk(
            chunk, *chunk_summary.mutable_transposed_chunk());
        break;
      default:
        break;
    }
    if (ABSL_PREDICT_FALSE(!status.ok())) {
      WriteLine("    # FILE CORRUPTED: ",
                Annotate(chunk_reader.AnnotateStatus(status),
                         absl::StrCat("at record ", chunk_summary.chunk_begin(),
                                      "/0"))
                    .message(),
                report);
    }
  }
  if (!chunk_reader.Close()) {
    WriteLine("  # FILE READ ERROR: ", chunk_reader.status().message(), report);
  }
  WriteLine('}', report);
  report.Flush();
}

const char kUsage[] =
    "Usage: describe_riegeli_file (OPTION|FILE)...\n"
    "\n"
    "Shows summary of Riegeli/records file contents.\n";

}  // namespace
}  // namespace riegeli::tools

int main(int argc, char** argv) {
  absl::SetProgramUsageMessage(riegeli::tools::kUsage);
  const std::vector<char*> args = absl::ParseCommandLine(argc, argv);
  riegeli::StdOut std_out;
  for (size_t i = 1; i < args.size(); ++i) {
    riegeli::tools::DescribeFile(args[i], std_out);
  }
  std_out.Close();
}


================================================
FILE: riegeli/records/tools/records_benchmark.cc
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Make file offsets 64-bit even on 32-bit systems.
#undef _FILE_OFFSET_BITS
#define _FILE_OFFSET_BITS 64

#include <stddef.h>
#include <stdint.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <time.h>

#include <algorithm>
#include <array>
#include <cerrno>
#include <cstdlib>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "absl/base/optimization.h"
#include "absl/flags/flag.h"
#include "absl/flags/parse.h"
#include "absl/flags/usage.h"
#include "absl/functional/function_ref.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/str_split.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/options_parser.h"
#include "riegeli/bytes/fd_reader.h"
#include "riegeli/bytes/fd_writer.h"
#include "riegeli/bytes/std_io.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/lines/line_writing.h"
#include "riegeli/lines/newline.h"
#include "riegeli/lines/text_writer.h"
#include "riegeli/records/chunk_reader.h"
#include "riegeli/records/record_reader.h"
#include "riegeli/records/record_writer.h"
#include "riegeli/records/tools/tfrecord_recognizer.h"
#include "riegeli/text/ascii_align.h"
#include "riegeli/varint/varint_writing.h"
#include "tensorflow/core/lib/io/compression.h"
#include "tensorflow/core/lib/io/record_reader.h"
#include "tensorflow/core/lib/io/record_writer.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/file_system.h"
#include "tensorflow/core/platform/tstring.h"

ABSL_FLAG(std::string, tfrecord_benchmarks, "uncompressed gzip",
          "Whitespace-separated TFRecord RecordWriter/RecordReader options");
ABSL_FLAG(std::string, riegeli_benchmarks,
          "uncompressed "
          "brotli:0 "
          "brotli:6 "
          "brotli:6,parallelism:10 "
          "brotli:9 "
          "zstd:1 "
          "zstd:3 "
          "zstd:15 "
          "snappy "
          "snappy:2 "
          "transpose,uncompressed "
          "transpose,brotli:6 "
          "transpose,brotli:6,parallelism:10 "
          "transpose,zstd:3 "
          "transpose,snappy",
          "Whitespace-separated Riegeli RecordWriter options");
ABSL_FLAG(uint64_t, max_size, uint64_t{100} * 1000 * 1000,
          "Maximum size of records to read, in bytes");
ABSL_FLAG(std::string, output_dir, "/tmp",
          "Directory to write files to (files are named record_benchmark_*)");
ABSL_FLAG(int32_t, repetitions, 5, "Number of times to repeat each benchmark");

namespace {

class SizeLimiter {
 public:
  explicit SizeLimiter(size_t limit);

  bool Accept(size_t size);

 private:
  size_t limit_;
  size_t remaining_;
};

SizeLimiter::SizeLimiter(size_t limit) : limit_(limit), remaining_(limit) {}

bool SizeLimiter::Accept(size_t size) {
  if (ABSL_PREDICT_TRUE(size < remaining_)) {
    remaining_ -= size;
    return true;
  }
  if (remaining_ == limit_) {
    // Return at least one item.
    remaining_ = 0;
    return true;
  }
  return false;
}

uint64_t FileSize(const std::string& filename) {
  struct stat stat_info;
  RIEGELI_CHECK_EQ(stat(filename.c_str(), &stat_info), 0)
      << absl::ErrnoToStatus(errno, "stat() failed").message();
  return riegeli::IntCast<uint64_t>(stat_info.st_size);
}

uint64_t CpuTimeNow_ns() {
  struct timespec time_info;
  RIEGELI_CHECK_EQ(clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time_info), 0);
  return riegeli::IntCast<uint64_t>(time_info.tv_sec) * uint64_t{1000000000} +
         riegeli::IntCast<uint64_t>(time_info.tv_nsec);
}

uint64_t RealTimeNow_ns() {
  struct timespec time_info;
  RIEGELI_CHECK_EQ(clock_gettime(CLOCK_MONOTONIC, &time_info), 0);
  return riegeli::IntCast<uint64_t>(time_info.tv_sec) * uint64_t{1000000000} +
         riegeli::IntCast<uint64_t>(time_info.tv_nsec);
}

class Stats {
 public:
  void Add(double value);

  double Median();

 private:
  std::vector<double> samples_;
};

void Stats::Add(double value) { samples_.push_back(value); }

double Stats::Median() {
  RIEGELI_CHECK(!samples_.empty()) << "No data";
  const size_t middle = samples_.size() / 2;
  std::nth_element(samples_.begin(), samples_.begin() + middle, samples_.end());
  return samples_[middle];
}

class Benchmarks {
 public:
  static bool ReadFile(absl::string_view filename,
                       std::vector<std::string>* records,
                       SizeLimiter* size_limiter, riegeli::Writer& report);

  explicit Benchmarks(std::vector<std::string> records, std::string output_dir,
                      int repetitions);

  void RegisterTFRecord(absl::string_view tfrecord_options);
  void RegisterRiegeli(absl::string_view riegeli_options);

  void RunAll(riegeli::Writer& report);

 private:
  static void WriteTFRecord(
      absl::string_view filename,
      const tensorflow::io::RecordWriterOptions& record_writer_options,
      absl::Span<const std::string> records);
  static bool ReadTFRecord(
      absl::string_view filename,
      const tensorflow::io::RecordReaderOptions& record_reader_options,
      std::vector<std::string>* records, SizeLimiter* size_limiter = nullptr);

  static void WriteRiegeli(
      absl::string_view filename,
      riegeli::RecordWriterBase::Options record_writer_options,
      absl::Span<const std::string> records);
  static bool ReadRiegeli(
      absl::string_view filename,
      riegeli::RecordReaderBase::Options record_reader_options,
      std::vector<std::string>* records, SizeLimiter* size_limiter = nullptr);

  void RunOne(
      absl::string_view name,
      absl::FunctionRef<void(absl::string_view, absl::Span<const std::string>)>
          write_records,
      absl::FunctionRef<void(absl::string_view, std::vector<std::string>*)>
          read_records,
      riegeli::Writer& report);

  static std::string Filename(absl::string_view name);

  std::vector<std::string> records_;
  size_t original_size_;
  std::string output_dir_;
  int repetitions_;
  std::vector<std::pair<std::string, const char*>> tfrecord_benchmarks_;
  std::vector<std::pair<std::string, riegeli::RecordWriterBase::Options>>
      riegeli_benchmarks_;
  size_t max_name_width_ = 0;
};

bool Benchmarks::ReadFile(absl::string_view filename,
                          std::vector<std::string>* records,
                          SizeLimiter* size_limiter, riegeli::Writer& report) {
  riegeli::FdReader<> file_reader(filename);
  if (ABSL_PREDICT_FALSE(!file_reader.ok())) {
    riegeli::StdErr errors;
    riegeli::WriteLine("Could not open file: ", file_reader.status().ToString(),
                       errors);
    errors.Close();
    std::exit(1);
  }
  {
    riegeli::TFRecordRecognizer tfrecord_recognizer(&file_reader);
    tensorflow::io::RecordReaderOptions record_reader_options;
    if (tfrecord_recognizer.CheckFileFormat(record_reader_options)) {
      RIEGELI_CHECK(tfrecord_recognizer.Close())
          << tfrecord_recognizer.status();
      RIEGELI_CHECK(file_reader.Close()) << file_reader.status();
      riegeli::WriteLine("Reading TFRecord: ", filename, report);
      report.Flush();
      return ReadTFRecord(filename, record_reader_options, records,
                          size_limiter);
    }
  }
  RIEGELI_CHECK(file_reader.Seek(0)) << file_reader.status();
  {
    riegeli::DefaultChunkReader<> chunk_reader(&file_reader);
    if (chunk_reader.CheckFileFormat()) {
      RIEGELI_CHECK(chunk_reader.Close()) << chunk_reader.status();
      RIEGELI_CHECK(file_reader.Close()) << file_reader.status();
      riegeli::WriteLine("Reading Riegeli/records: ", filename, report);
      report.Flush();
      return ReadRiegeli(filename, riegeli::RecordReaderBase::Options(),
                         records, size_limiter);
    }
  }
  riegeli::StdErr errors;
  riegeli::WriteLine("Unknown file format: ", filename, errors);
  errors.Close();
  std::exit(1);
}

void Benchmarks::WriteTFRecord(
    absl::string_view filename,
    const tensorflow::io::RecordWriterOptions& record_writer_options,
    absl::Span<const std::string> records) {
  tensorflow::Env* const env = tensorflow::Env::Default();
  std::unique_ptr<tensorflow::WritableFile> file_writer;
  {
    const absl::Status status =
        env->NewWritableFile(std::string(filename), &file_writer);
    RIEGELI_CHECK_OK(status);
  }
  tensorflow::io::RecordWriter record_writer(file_writer.get(),
                                             record_writer_options);
  for (const absl::string_view record : records) {
    const absl::Status status = record_writer.WriteRecord(record);
    RIEGELI_CHECK_OK(status);
  }
  const absl::Status status = record_writer.Close();
  RIEGELI_CHECK_OK(status);
}

bool Benchmarks::ReadTFRecord(
    absl::string_view filename,
    const tensorflow::io::RecordReaderOptions& record_reader_options,
    std::vector<std::string>* records, SizeLimiter* size_limiter) {
  tensorflow::Env* const env = tensorflow::Env::Default();
  std::unique_ptr<tensorflow::RandomAccessFile> file_reader;
  {
    const absl::Status status =
        env->NewRandomAccessFile(std::string(filename), &file_reader);
    RIEGELI_CHECK_OK(status);
  }
  tensorflow::io::SequentialRecordReader record_reader(file_reader.get(),
                                                       record_reader_options);
  tensorflow::tstring record;
  for (;;) {
    const absl::Status status = record_reader.ReadRecord(&record);
    if (!status.ok()) {
      RIEGELI_CHECK(absl::IsOutOfRange(status)) << status;
      break;
    }
    if (size_limiter != nullptr &&
        ABSL_PREDICT_FALSE(!size_limiter->Accept(
            riegeli::LengthVarint64(record.size()) + record.size()))) {
      return false;
    }
    records->push_back(std::move(record));
  }
  return true;
}

void Benchmarks::WriteRiegeli(
    absl::string_view filename,
    riegeli::RecordWriterBase::Options record_writer_options,
    absl::Span<const std::string> records) {
  riegeli::RecordWriter<riegeli::FdWriter<>> record_writer(
      riegeli::Maker(filename), std::move(record_writer_options));
  for (const absl::string_view record : records) {
    RIEGELI_CHECK(record_writer.WriteRecord(record)) << record_writer.status();
  }
  RIEGELI_CHECK(record_writer.Close()) << record_writer.status();
}

bool Benchmarks::ReadRiegeli(
    absl::string_view filename,
    riegeli::RecordReaderBase::Options record_reader_options,
    std::vector<std::string>* records, SizeLimiter* size_limiter) {
  riegeli::RecordReader<riegeli::FdReader<>> record_reader(
      riegeli::Maker(filename), std::move(record_reader_options));
  std::string record;
  while (record_reader.ReadRecord(record)) {
    if (size_limiter != nullptr &&
        ABSL_PREDICT_FALSE(!size_limiter->Accept(
            riegeli::LengthVarint64(record.size()) + record.size()))) {
      return false;
    }
    records->push_back(std::move(record));
  }
  RIEGELI_CHECK(record_reader.Close()) << record_reader.status();
  return true;
}

std::string Benchmarks::Filename(absl::string_view name) {
  std::string filename(name);
  for (char& ch : filename) {
    if (!(ch == '-' || ch == '.' || (ch >= '0' && ch <= '9') ||
          (ch >= 'A' && ch <= 'Z') || ch == '_' || (ch >= 'a' && ch <= 'z'))) {
      ch = '_';
    }
  }
  return filename;
}

Benchmarks::Benchmarks(std::vector<std::string> records, std::string output_dir,
                       int repetitions)
    : records_(std::move(records)),
      original_size_(0),
      output_dir_(std::move(output_dir)),
      repetitions_(repetitions) {
  for (const absl::string_view record : records_) {
    original_size_ += riegeli::LengthVarint64(record.size()) + record.size();
  }
}

void Benchmarks::RegisterTFRecord(absl::string_view tfrecord_options) {
  max_name_width_ = riegeli::UnsignedMax(
      max_name_width_,
      absl::string_view("tfrecord ").size() + tfrecord_options.size());
  const char* compression = tensorflow::io::compression::kNone;
  riegeli::OptionsParser options_parser;
  options_parser.AddOption(
      "uncompressed",
      riegeli::ValueParser::And(
          riegeli::ValueParser::FailIfSeen("gzip"),
          riegeli::ValueParser::Empty(tensorflow::io::compression::kNone,
                                      &compression)));
  options_parser.AddOption(
      "gzip", riegeli::ValueParser::And(
                  riegeli::ValueParser::FailIfSeen("uncompressed"),
                  riegeli::ValueParser::Empty(
                      tensorflow::io::compression::kGzip, &compression)));
  RIEGELI_CHECK(options_parser.FromString(tfrecord_options))
      << options_parser.status();
  tfrecord_benchmarks_.emplace_back(tfrecord_options, compression);
}

void Benchmarks::RegisterRiegeli(absl::string_view riegeli_options) {
  max_name_width_ = riegeli::UnsignedMax(
      max_name_width_,
      absl::string_view("riegeli ").size() + riegeli_options.size());
  riegeli::RecordWriterBase::Options options;
  RIEGELI_CHECK_OK(options.FromString(riegeli_options));
  riegeli_benchmarks_.emplace_back(riegeli_options, std::move(options));
}

void Benchmarks::RunAll(riegeli::Writer& report) {
  report.Write("Original uncompressed size: ");
  absl::Format(&report, "%.3f",
               static_cast<double>(original_size_) / 1000000.0);
  riegeli::WriteLine(" MB", report);
  riegeli::WriteLine("Creating files ", output_dir_, "/record_benchmark_*",
                     report);
  riegeli::WriteLine(riegeli::AsciiLeft(max_name_width_),
                     "  Compr.    Write       Read", report);
  riegeli::WriteLine(riegeli::AsciiLeft(max_name_width_),
                     "  ratio    CPU Real   CPU Real", report);
  riegeli::WriteLine(riegeli::AsciiLeft(max_name_width_),
                     "    %     MB/s MB/s  MB/s MB/s", report);
  riegeli::WriteLine(riegeli::AsciiLeft(riegeli::AlignOptions()
                                            .set_width(max_name_width_ + 30)
                                            .set_fill('-')),
                     report);

  for (const std::pair<std::string, const char*>& tfrecord_options :
       tfrecord_benchmarks_) {
    RunOne(
        absl::StrCat("tfrecord ", tfrecord_options.first),
        [&](absl::string_view filename, absl::Span<const std::string> records) {
          WriteTFRecord(
              filename,
              tensorflow::io::RecordWriterOptions::CreateRecordWriterOptions(
                  tfrecord_options.second),
              records);
        },
        [&](absl::string_view filename, std::vector<std::string>* records) {
          return ReadTFRecord(
              filename,
              tensorflow::io::RecordReaderOptions::CreateRecordReaderOptions(
                  tfrecord_options.second),
              records);
        },
        report);
  }
  for (const std::pair<std::string, riegeli::RecordWriterBase::Options>&
           riegeli_options : riegeli_benchmarks_) {
    RunOne(
        absl::StrCat("riegeli ", riegeli_options.first),
        [&](absl::string_view filename, absl::Span<const std::string> records) {
          WriteRiegeli(filename, riegeli_options.second, records);
        },
        [&](absl::string_view filename, std::vector<std::string>* records) {
          return ReadRiegeli(filename, riegeli::RecordReaderBase::Options(),
                             records);
        },
        report);
  }
}

void Benchmarks::RunOne(
    absl::string_view name,
    absl::FunctionRef<void(absl::string_view, absl::Span<const std::string>)>
        write_records,
    absl::FunctionRef<void(absl::string_view, std::vector<std::string>*)>
        read_records,
    riegeli::Writer& report) {
  report.Write(riegeli::AsciiLeft(name, max_name_width_), ' ');
  report.Flush();
  const std::string filename =
      absl::StrCat(output_dir_, "/record_benchmark_", Filename(name));

  Stats compression;
  Stats writing_cpu_speed;
  Stats writing_real_speed;
  Stats reading_cpu_speed;
  Stats reading_real_speed;
  for (int i = 0; i < repetitions_ + 1; ++i) {
    const uint64_t cpu_time_before_ns = CpuTimeNow_ns();
    const uint64_t real_time_before_ns = RealTimeNow_ns();
    write_records(filename, records_);
    const uint64_t cpu_time_after_ns = CpuTimeNow_ns();
    const uint64_t real_time_after_ns = RealTimeNow_ns();
    if (i == 0) {
      // Warm-up.
    } else {
      compression.Add(static_cast<double>(FileSize(filename)) /
                      static_cast<double>(original_size_) * 100.0);
      writing_cpu_speed.Add(
          static_cast<double>(original_size_) /
          static_cast<double>(cpu_time_after_ns - cpu_time_before_ns) * 1000.0);
      writing_real_speed.Add(
          static_cast<double>(original_size_) /
          static_cast<double>(real_time_after_ns - real_time_before_ns) *
          1000.0);
    }
  }
  for (int i = 0; i < repetitions_ + 1; ++i) {
    std::vector<std::string> decoded_records;
    const uint64_t cpu_time_before_ns = CpuTimeNow_ns();
    const uint64_t real_time_before_ns = RealTimeNow_ns();
    read_records(filename, &decoded_records);
    const uint64_t cpu_time_after_ns = CpuTimeNow_ns();
    const uint64_t real_time_after_ns = RealTimeNow_ns();
    if (i == 0) {
      // Warm-up and correctness check.
      RIEGELI_CHECK(decoded_records == records_)
          << "Decoded records do not match for " << name;
    } else {
      reading_cpu_speed.Add(
          static_cast<double>(original_size_) /
          static_cast<double>(cpu_time_after_ns - cpu_time_before_ns) * 1000.0);
      reading_real_speed.Add(
          static_cast<double>(original_size_) /
          static_cast<double>(real_time_after_ns - real_time_before_ns) *
          1000.0);
    }
  }

  absl::Format(&report, "%7.3f", compression.Median());
  for (const std::array<Stats*, 2>& stats_cpu_real :
       {std::array<Stats*, 2>{{&writing_cpu_speed, &writing_real_speed}},
        std::array<Stats*, 2>{{&reading_cpu_speed, &reading_real_speed}}}) {
    report.Write(' ');
    for (Stats* const stats : stats_cpu_real) {
      report.Write(' ');
      absl::Format(&report, "%4.0f", stats->Median());
    }
  }
  riegeli::WriteLine(report);
}

const char kUsage[] =
    "Usage: records_benchmark (OPTION|FILE)...\n"
    "\n"
    "FILEs may be TFRecord or Riegeli/records files.\n";

template <typename Function>
void ForEachWord(absl::string_view words, Function&& f) {
  for (const absl::string_view word :
       absl::StrSplit(words, absl::ByAnyChar("\t\n "), absl::SkipEmpty())) {
    f(word);
  }
}

}  // namespace

int main(int argc, char** argv) {
  absl::SetProgramUsageMessage(kUsage);
  const std::vector<char*> args = absl::ParseCommandLine(argc, argv);
  std::vector<std::string> records;
  if (args.size() <= 1) {
    riegeli::TextWriter<riegeli::WriteNewline::kNative, riegeli::StdErr>
        std_err(riegeli::Maker());
    std_err.Write(kUsage, '\n');
    std_err.Close();
    return 1;
  }
  riegeli::StdOut std_out;
  SizeLimiter size_limiter(
      riegeli::IntCast<size_t>(absl::GetFlag(FLAGS_max_size)));
  for (size_t i = 1; i < args.size(); ++i) {
    if (!Benchmarks::ReadFile(args[i], &records, &size_limiter, std_out)) break;
  }
  Benchmarks benchmarks(std::move(records), absl::GetFlag(FLAGS_output_dir),
                        absl::GetFlag(FLAGS_repetitions));
  ForEachWord(absl::GetFlag(FLAGS_tfrecord_benchmarks),
              [&](absl::string_view tfrecord_options) {
                benchmarks.RegisterTFRecord(tfrecord_options);
              });
  ForEachWord(absl::GetFlag(FLAGS_riegeli_benchmarks),
              [&](absl::string_view riegeli_options) {
                benchmarks.RegisterRiegeli(riegeli_options);
              });
  benchmarks.RunAll(std_out);
  std_out.Close();
}


================================================
FILE: riegeli/records/tools/riegeli_summary.proto
================================================
edition = "2024";

package riegeli.summary;

import "riegeli/records/records_metadata.proto";

// Summary of a Riegeli/records file contents.
//
// This is currently used merely to format the output of describe_riegeli_file
// as structured data.

enum ChunkType {
  CHUNK_TYPE_ZERO = 0;
  FILE_SIGNATURE = 0x73;
  FILE_METADATA = 0x6d;
  PADDING = 0x70;
  SIMPLE = 0x72;
  TRANSPOSED = 0x74;
}

enum CompressionType {
  NONE = 0;
  BROTLI = 0x62;
  ZSTD = 0x7a;
  SNAPPY = 0x73;
}

message SimpleChunk {
  CompressionType compression_type = 1;
  repeated uint64 record_sizes = 2;
  repeated bytes records = 3;
}

message TransposedChunk {
  CompressionType compression_type = 1;
  repeated uint64 record_sizes = 2;
  repeated bytes records = 3;
}

message Chunk {
  uint64 chunk_begin = 1;
  ChunkType chunk_type = 2;
  uint64 data_size = 3;
  uint64 num_records = 4;
  uint64 decoded_data_size = 5;

  oneof data {
    riegeli.RecordsMetadata file_metadata_chunk = 6;
    SimpleChunk simple_chunk = 7;
    TransposedChunk transposed_chunk = 8;
  }
}

// This is not used because each chunk is printed on the fly, so that the output
// appears incrementally.
//
// message File {
//   string filename = 1;
//   uint64 file_size = 2;
//   repeated Chunk chunk = 3;
// }


================================================
FILE: riegeli/records/tools/tfrecord_recognizer.cc
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/records/tools/tfrecord_recognizer.h"

#include <stdint.h>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "riegeli/base/any.h"
#include "riegeli/base/maker.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/endian/endian_reading.h"
#include "riegeli/zlib/zlib_reader.h"
#include "tensorflow/core/lib/hash/crc32c.h"
#include "tensorflow/core/lib/io/record_reader.h"

namespace riegeli {

bool TFRecordRecognizer::CheckFileFormat(
    tensorflow::io::RecordReaderOptions& record_reader_options) {
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(!byte_reader_->Pull())) {
    if (ABSL_PREDICT_FALSE(!byte_reader_->ok())) {
      return Fail(byte_reader_->status());
    }
    // Empty file: return `false` but leave `ok()` as `true`. This mimics the
    // behavior of reading functions at end of file.
    return false;
  }

  Any<Reader*>::Inlining<ZlibReader<>> reader;
  if (RecognizeZlib(*byte_reader_)) {
    record_reader_options.compression_type =
        tensorflow::io::RecordReaderOptions::ZLIB_COMPRESSION;
    record_reader_options.zlib_options =
        tensorflow::io::ZlibCompressionOptions::DEFAULT();
    record_reader_options.zlib_options.window_bits = 32;
    reader = riegeli::Maker<ZlibReader<>>(byte_reader_);
  } else {
    record_reader_options.compression_type =
        tensorflow::io::RecordReaderOptions::NONE;
    reader = byte_reader_;
  }

  if (ABSL_PREDICT_FALSE(!reader->Pull(sizeof(uint64_t) + sizeof(uint32_t)))) {
    if (ABSL_PREDICT_FALSE(!reader->ok())) return Fail(reader->status());
    return Fail(absl::InvalidArgumentError("Truncated TFRecord file"));
  }
  if (tensorflow::crc32c::Unmask(
          ReadLittleEndian<uint32_t>(reader->cursor() + sizeof(uint64_t))) !=
      tensorflow::crc32c::Value(reader->cursor(), sizeof(uint64_t))) {
    return Fail(absl::InvalidArgumentError("Corrupted TFRecord file"));
  }
  return true;
}

}  // namespace riegeli


================================================
FILE: riegeli/records/tools/tfrecord_recognizer.h
================================================
#include "absl/base/attributes.h"
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_RECORDS_TOOLS_TFRECORD_DETECTOR_H_
#define RIEGELI_RECORDS_TOOLS_TFRECORD_DETECTOR_H_

#include "riegeli/base/assert.h"
#include "riegeli/base/object.h"
#include "riegeli/bytes/reader.h"
#include "tensorflow/core/lib/io/record_reader.h"

namespace riegeli {

class TFRecordRecognizer : public Object {
 public:
  explicit TFRecordRecognizer(
      Reader* byte_reader ABSL_ATTRIBUTE_LIFETIME_BOUND);

  // Ensures that the file looks like a valid TFRecord file.
  //
  // Updates `record_reader_options` on success.
  //
  // Return values:
  //  * `true`                 - success (`record_reader_options` is updated)
  //  * `false` (when `ok()`)  - source ends
  //  * `false` (when `!ok()`) - failure
  bool CheckFileFormat(
      tensorflow::io::RecordReaderOptions& record_reader_options);

 private:
  Reader* byte_reader_;
};

// Implementation details follow.

inline TFRecordRecognizer::TFRecordRecognizer(
    Reader* byte_reader ABSL_ATTRIBUTE_LIFETIME_BOUND)
    : byte_reader_(RIEGELI_EVAL_ASSERT_NOTNULL(byte_reader)) {}

}  // namespace riegeli

#endif  // RIEGELI_RECORDS_TOOLS_TFRECORD_DETECTOR_H_


================================================
FILE: riegeli/snappy/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "snappy_reader",
    srcs = ["snappy_reader.cc"],
    hdrs = ["snappy_reader.h"],
    deps = [
        ":snappy_streams",
        "//riegeli/base:assert",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:chain_reader",
        "//riegeli/bytes:chain_writer",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:writer",
        "//riegeli/varint:varint_reading",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@snappy",
    ],
)

cc_library(
    name = "snappy_writer",
    srcs = ["snappy_writer.cc"],
    hdrs = ["snappy_writer.h"],
    deps = [
        ":snappy_streams",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:null_safe_memcpy",
        "//riegeli/base:object",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:chain_reader",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/types:span",
        "@snappy",
    ],
)

cc_library(
    name = "snappy_streams",
    srcs = ["snappy_streams.cc"],
    hdrs = ["snappy_streams.h"],
    visibility = ["//visibility:private"],
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:external_ref",
        "//riegeli/base:types",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
        "@snappy",
    ],
)


================================================
FILE: riegeli/snappy/framed/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "framed_snappy_reader",
    srcs = ["framed_snappy_reader.cc"],
    hdrs = ["framed_snappy_reader.h"],
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffer",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:moving_dependency",
        "//riegeli/base:object",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:pullable_reader",
        "//riegeli/bytes:reader",
        "//riegeli/endian:endian_reading",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/crc:crc32c",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@snappy",
    ],
)

cc_library(
    name = "framed_snappy_writer",
    srcs = ["framed_snappy_writer.cc"],
    hdrs = ["framed_snappy_writer.h"],
    deps = [
        ":framed_snappy_reader",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffer",
        "//riegeli/base:buffering",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:pushable_writer",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:writer",
        "//riegeli/digests:crc32c_digester",
        "//riegeli/digests:digesting_writer",
        "//riegeli/endian:endian_writing",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@snappy",
    ],
)


================================================
FILE: riegeli/snappy/framed/framed_snappy_reader.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/snappy/framed/framed_snappy_reader.h"

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <memory>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/crc/crc32c.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffer.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/pullable_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/endian/endian_reading.h"
#include "snappy.h"

namespace riegeli {

namespace {

// https://github.com/google/snappy/blob/e9e11b84e629c3e06fbaa4f0a86de02ceb9d6992/framing_format.txt#L39
inline uint32_t MaskChecksum(uint32_t x) {
  return ((x >> 15) | (x << 17)) + 0xa282ead8;
}

}  // namespace

void FramedSnappyReaderBase::Initialize(Reader* src) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of FramedSnappyReader: null Reader pointer";
  if (ABSL_PREDICT_FALSE(!src->ok()) && src->available() == 0) {
    FailWithoutAnnotation(AnnotateOverSrc(src->status()));
    return;
  }
  initial_compressed_pos_ = src->pos();
}

void FramedSnappyReaderBase::Done() {
  if (ABSL_PREDICT_FALSE(truncated_)) {
    Reader& src = *SrcReader();
    FailWithoutAnnotation(
        AnnotateOverSrc(src.AnnotateStatus(absl::InvalidArgumentError(
            "Truncated FramedSnappy-compressed stream"))));
  }
  PullableReader::Done();
  uncompressed_ = Buffer();
}

inline bool FramedSnappyReaderBase::FailInvalidStream(
    absl::string_view message) {
  return Fail(absl::InvalidArgumentError(
      absl::StrCat("Invalid FramedSnappy-compressed stream: ", message)));
}

absl::Status FramedSnappyReaderBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    if (ABSL_PREDICT_FALSE(truncated_)) {
      status =
          Annotate(status, "reading truncated FramedSnappy-compressed stream");
    }
    Reader& src = *SrcReader();
    status = src.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `src` with the compressed position.
  // Clarify that the current position is the uncompressed position instead of
  // delegating to `PullableReader::AnnotateStatusImpl()`.
  return AnnotateOverSrc(std::move(status));
}

absl::Status FramedSnappyReaderBase::AnnotateOverSrc(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("at uncompressed byte ", pos()));
  }
  return status;
}

bool FramedSnappyReaderBase::PullBehindScratch(size_t recommended_length) {
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of PullableReader::PullBehindScratch(): "
         "some data available, use Pull() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::PullBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  truncated_ = false;
  while (src.Pull(sizeof(uint32_t))) {
    const uint32_t chunk_header = ReadLittleEndian<uint32_t>(src.cursor());
    const uint8_t chunk_type = static_cast<uint8_t>(chunk_header);
    const size_t chunk_length = IntCast<size_t>(chunk_header >> 8);
    if (ABSL_PREDICT_FALSE(!src.Pull(sizeof(uint32_t) + chunk_length))) {
      set_buffer();
      if (ABSL_PREDICT_FALSE(!src.ok())) {
        return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
      }
      truncated_ = true;
      return false;
    }
    if (ABSL_PREDICT_FALSE(src.pos() == 0 &&
                           chunk_type != 0xff /* Stream identifier */)) {
      set_buffer();
      return FailInvalidStream("missing stream identifier");
    }
    switch (chunk_type) {
      case 0x00: {  // Compressed data.
        if (ABSL_PREDICT_FALSE(chunk_length < sizeof(uint32_t))) {
          set_buffer();
          return FailInvalidStream("compressed data too short");
        }
        const uint32_t checksum =
            ReadLittleEndian<uint32_t>(src.cursor() + sizeof(uint32_t));
        const char* const compressed_data = src.cursor() + 2 * sizeof(uint32_t);
        const size_t compressed_length = chunk_length - sizeof(uint32_t);
        size_t uncompressed_length;
        if (ABSL_PREDICT_FALSE(!snappy::GetUncompressedLength(
                compressed_data, compressed_length, &uncompressed_length))) {
          set_buffer();
          return FailInvalidStream("invalid uncompressed length");
        }
        if (ABSL_PREDICT_FALSE(uncompressed_length > snappy::kBlockSize)) {
          set_buffer();
          return FailInvalidStream("uncompressed length too large");
        }
        uncompressed_.Reset(uncompressed_length);
        if (ABSL_PREDICT_FALSE(!snappy::RawUncompress(
                compressed_data, compressed_length, uncompressed_.data()))) {
          set_buffer();
          return FailInvalidStream("invalid compressed data");
        }
        if (ABSL_PREDICT_FALSE(
                MaskChecksum(static_cast<uint32_t>(absl::ComputeCrc32c(
                    absl::string_view(uncompressed_.data(),
                                      uncompressed_length)))) != checksum)) {
          set_buffer();
          return FailInvalidStream(
              "Invalid FramedSnappy-compressed stream: wrong checksum");
        }
        src.move_cursor(sizeof(uint32_t) + chunk_length);
        if (ABSL_PREDICT_FALSE(uncompressed_length == 0)) continue;
        const Position max_length =
            std::numeric_limits<Position>::max() - limit_pos();
        if (ABSL_PREDICT_FALSE(uncompressed_length > max_length)) {
          set_buffer(uncompressed_.data(), IntCast<size_t>(max_length));
          move_limit_pos(available());
          return FailOverflow();
        }
        set_buffer(uncompressed_.data(), uncompressed_length);
        move_limit_pos(available());
        return true;
      }
      case 0x01: {  // Uncompressed data.
        if (ABSL_PREDICT_FALSE(chunk_length < sizeof(uint32_t))) {
          set_buffer();
          return FailInvalidStream("uncompressed data too short");
        }
        const uint32_t checksum =
            ReadLittleEndian<uint32_t>(src.cursor() + sizeof(uint32_t));
        const char* const uncompressed_data =
            src.cursor() + 2 * sizeof(uint32_t);
        const size_t uncompressed_length = chunk_length - sizeof(uint32_t);
        if (ABSL_PREDICT_FALSE(uncompressed_length > snappy::kBlockSize)) {
          set_buffer();
          return FailInvalidStream("uncompressed length too large");
        }
        if (ABSL_PREDICT_FALSE(
                MaskChecksum(static_cast<uint32_t>(absl::ComputeCrc32c(
                    absl::string_view(uncompressed_data,
                                      uncompressed_length)))) != checksum)) {
          set_buffer();
          return FailInvalidStream("wrong checksum");
        }
        src.move_cursor(sizeof(uint32_t) + chunk_length);
        if (ABSL_PREDICT_FALSE(uncompressed_length == 0)) continue;
        const Position max_length =
            std::numeric_limits<Position>::max() - limit_pos();
        if (ABSL_PREDICT_FALSE(uncompressed_length > max_length)) {
          set_buffer(uncompressed_data, IntCast<size_t>(max_length));
          move_limit_pos(available());
          return FailOverflow();
        }
        set_buffer(uncompressed_data, uncompressed_length);
        move_limit_pos(available());
        return true;
      }
      case 0xff:  // Stream identifier.
        if (ABSL_PREDICT_FALSE(
                absl::string_view(src.cursor() + sizeof(uint32_t),
                                  chunk_length) !=
                absl::string_view("sNaPpY", 6))) {
          set_buffer();
          return FailInvalidStream("invalid stream identifier");
        }
        src.move_cursor(sizeof(uint32_t) + chunk_length);
        continue;
      default:
        if (ABSL_PREDICT_FALSE(chunk_type < 0x80)) {
          set_buffer();
          return FailInvalidStream("reserved unskippable chunk");
        }
        src.move_cursor(sizeof(uint32_t) + chunk_length);
        continue;
    }
  }
  set_buffer();
  if (ABSL_PREDICT_FALSE(!src.ok())) {
    return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
  }
  if (ABSL_PREDICT_FALSE(src.available() > 0)) truncated_ = true;
  return false;
}

bool FramedSnappyReaderBase::ToleratesReadingAhead() {
  Reader* const src = SrcReader();
  return src != nullptr && src->ToleratesReadingAhead();
}

bool FramedSnappyReaderBase::SupportsRewind() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsRewind();
}

bool FramedSnappyReaderBase::SeekBehindScratch(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of PullableReader::SeekBehindScratch(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::SeekBehindScratch(): "
         "scratch used";
  if (new_pos <= limit_pos()) {
    // Seeking backwards.
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    Reader& src = *SrcReader();
    truncated_ = false;
    set_buffer();
    set_limit_pos(0);
    if (ABSL_PREDICT_FALSE(!src.Seek(initial_compressed_pos_))) {
      return FailWithoutAnnotation(
          AnnotateOverSrc(src.StatusOrAnnotate(absl::DataLossError(
              "FramedSnappy-compressed stream got truncated"))));
    }
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    if (new_pos == 0) return true;
  }
  return PullableReader::SeekBehindScratch(new_pos);
}

bool FramedSnappyReaderBase::SupportsNewReader() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsNewReader();
}

std::unique_ptr<Reader> FramedSnappyReaderBase::NewReaderImpl(
    Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point
  // if `SrcReader()->SupportsNewReader()`.
  Reader& src = *SrcReader();
  std::unique_ptr<Reader> compressed_reader =
      src.NewReader(initial_compressed_pos_);
  if (ABSL_PREDICT_FALSE(compressed_reader == nullptr)) {
    FailWithoutAnnotation(AnnotateOverSrc(src.status()));
    return nullptr;
  }
  std::unique_ptr<Reader> reader =
      std::make_unique<FramedSnappyReader<std::unique_ptr<Reader>>>(
          std::move(compressed_reader));
  reader->Seek(initial_pos);
  return reader;
}

bool RecognizeFramedSnappy(Reader& src) {
  const absl::string_view kSignature(
      "\xff\x06\x00\x00"
      "sNaPpY",
      10);
  return src.Pull(kSignature.size()) &&
         absl::string_view(src.cursor(), kSignature.size()) == kSignature;
}

}  // namespace riegeli


================================================
FILE: riegeli/snappy/framed/framed_snappy_reader.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_SNAPPY_FRAMED_FRAMED_SNAPPY_READER_H_
#define RIEGELI_SNAPPY_FRAMED_FRAMED_SNAPPY_READER_H_

#include <stddef.h>

#include <memory>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/buffer.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/moving_dependency.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/pullable_reader.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

// Template parameter independent part of `FramedSnappyReader`.
class FramedSnappyReaderBase : public PullableReader {
 public:
  class Options {};

  // Returns the compressed `Reader`. Unchanged by `Close()`.
  virtual Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool ToleratesReadingAhead() override;
  bool SupportsRewind() override;
  bool SupportsNewReader() override;

 protected:
  using PullableReader::PullableReader;

  FramedSnappyReaderBase(FramedSnappyReaderBase&& that) noexcept;
  FramedSnappyReaderBase& operator=(FramedSnappyReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(Reader* src);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverSrc(absl::Status status);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool PullBehindScratch(size_t recommended_length) override;
  bool SeekBehindScratch(Position new_pos) override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;

 private:
  ABSL_ATTRIBUTE_COLD bool FailInvalidStream(absl::string_view message);

  Position initial_compressed_pos_ = 0;
  // If `true`, the source is truncated (without a clean end of the compressed
  // stream) at the current position. If the source does not grow, `Close()`
  // will fail.
  bool truncated_ = false;
  // Buffered uncompressed data.
  Buffer uncompressed_;

  // Invariant if scratch is not used:
  //   `start() == nullptr` or `start() == uncompressed_.data()` or
  //   `limit() == SrcReader()->cursor()`
};

// A `Reader` which decompresses data with framed Snappy format after getting
// it from another `Reader`:
// https://github.com/google/snappy/blob/master/framing_format.txt
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the compressed `Reader`. `Src` must support
// `Dependency<Reader*, Src>`, e.g. `Reader*` (not owned, default),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The compressed `Reader` must not be accessed until the `FramedSnappyReader`
// is closed or no longer used.
template <typename Src = Reader*>
class FramedSnappyReader : public FramedSnappyReaderBase {
 public:
  // Creates a closed `FramedSnappyReader`.
  explicit FramedSnappyReader(Closed) noexcept
      : FramedSnappyReaderBase(kClosed) {}

  // Will read from the compressed `Reader` provided by `src`.
  explicit FramedSnappyReader(Initializer<Src> src,
                              Options options = Options());

  FramedSnappyReader(FramedSnappyReader&& that) = default;
  FramedSnappyReader& operator=(FramedSnappyReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `FramedSnappyReader`. This
  // avoids constructing a temporary `FramedSnappyReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());

  // Returns the object providing and possibly owning the compressed `Reader`.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;
  void SetReadAllHintImpl(bool read_all_hint) override;
  void VerifyEndImpl() override;

 private:
  class Mover;

  // The object providing and possibly owning the compressed `Reader`.
  MovingDependency<Reader*, Src, Mover> src_;
};

explicit FramedSnappyReader(Closed) -> FramedSnappyReader<DeleteCtad<Closed>>;
template <typename Src>
explicit FramedSnappyReader(Src&& src, FramedSnappyReaderBase::Options options =
                                           FramedSnappyReaderBase::Options())
    -> FramedSnappyReader<TargetT<Src>>;

// Returns `true` if the data look like they have been FramedSnappy-compressed.
//
// The current position of `src` is unchanged.
bool RecognizeFramedSnappy(Reader& src);

// Implementation details follow.

inline FramedSnappyReaderBase::FramedSnappyReaderBase(
    FramedSnappyReaderBase&& that) noexcept
    : PullableReader(static_cast<PullableReader&&>(that)),
      initial_compressed_pos_(that.initial_compressed_pos_),
      truncated_(that.truncated_),
      uncompressed_(std::move(that.uncompressed_)) {}

inline FramedSnappyReaderBase& FramedSnappyReaderBase::operator=(
    FramedSnappyReaderBase&& that) noexcept {
  PullableReader::operator=(static_cast<PullableReader&&>(that));
  initial_compressed_pos_ = that.initial_compressed_pos_;
  truncated_ = that.truncated_;
  uncompressed_ = std::move(that.uncompressed_);
  return *this;
}

inline void FramedSnappyReaderBase::Reset(Closed) {
  PullableReader::Reset(kClosed);
  initial_compressed_pos_ = 0;
  truncated_ = false;
  uncompressed_ = Buffer();
}

inline void FramedSnappyReaderBase::Reset() {
  PullableReader::Reset();
  initial_compressed_pos_ = 0;
  truncated_ = false;
}

template <typename Src>
class FramedSnappyReader<Src>::Mover {
 public:
  static auto member() { return &FramedSnappyReader::src_; }

  explicit Mover(FramedSnappyReader& self, FramedSnappyReader& that)
      : behind_scratch_(&self),
        // Buffer pointers are already moved so `limit()` is taken from `self`.
        // `src_` is not moved yet so `src_` is taken from `that`.
        reads_uncompressed_(ABSL_PREDICT_TRUE(self.is_open()) &&
                            self.limit() == that.src_->cursor()) {
    if (reads_uncompressed_) {
      available_ = self.available();
      that.src_->set_cursor(that.src_->cursor() - available_);
    }
  }

  void Done(FramedSnappyReader& self) {
    if (reads_uncompressed_) {
      if (ABSL_PREDICT_FALSE(!self.src_->Pull(available_))) {
        self.FailWithoutAnnotation(self.AnnotateOverSrc(self.src_->status()));
        return;
      }
      self.set_buffer(self.src_->cursor(), available_);
      self.src_->move_cursor(available_);
    }
  }

 private:
  BehindScratch behind_scratch_;
  bool reads_uncompressed_;
  size_t available_;
};

template <typename Src>
inline FramedSnappyReader<Src>::FramedSnappyReader(
    Initializer<Src> src, ABSL_ATTRIBUTE_UNUSED Options options)
    : src_(std::move(src)) {
  Initialize(src_.get());
}

template <typename Src>
inline void FramedSnappyReader<Src>::Reset(Closed) {
  FramedSnappyReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void FramedSnappyReader<Src>::Reset(
    Initializer<Src> src, ABSL_ATTRIBUTE_UNUSED Options options) {
  FramedSnappyReaderBase::Reset();
  src_.Reset(std::move(src));
  Initialize(src_.get());
}

template <typename Src>
void FramedSnappyReader<Src>::Done() {
  FramedSnappyReaderBase::Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      FailWithoutAnnotation(AnnotateOverSrc(src_->status()));
    }
  }
}

template <typename Src>
void FramedSnappyReader<Src>::SetReadAllHintImpl(bool read_all_hint) {
  if (src_.IsOwning()) src_->SetReadAllHint(read_all_hint);
}

template <typename Src>
void FramedSnappyReader<Src>::VerifyEndImpl() {
  FramedSnappyReaderBase::VerifyEndImpl();
  if (src_.IsOwning() && ABSL_PREDICT_TRUE(ok())) src_->VerifyEnd();
}

}  // namespace riegeli

#endif  // RIEGELI_SNAPPY_FRAMED_FRAMED_SNAPPY_READER_H_


================================================
FILE: riegeli/snappy/framed/framed_snappy_writer.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/snappy/framed/framed_snappy_writer.h"

#include <stddef.h>
#include <stdint.h>

#include <cstring>
#include <limits>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffer.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/pushable_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/digests/crc32c_digester.h"
#include "riegeli/digests/digesting_writer.h"
#include "riegeli/endian/endian_writing.h"
#include "riegeli/snappy/framed/framed_snappy_reader.h"
#include "snappy.h"

namespace riegeli {

void FramedSnappyWriterBase::Initialize(Writer* dest, int compression_level) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of FramedSnappyWriter: null Writer pointer";
  compression_level_ = compression_level;
  if (ABSL_PREDICT_FALSE(!dest->ok())) {
    FailWithoutAnnotation(AnnotateOverDest(dest->status()));
    return;
  }
  initial_compressed_pos_ = dest->pos();
  if (dest->pos() == 0) {
    // Stream identifier.
    if (ABSL_PREDICT_FALSE(!dest->Write(absl::string_view("\xff\x06\x00\x00"
                                                          "sNaPpY",
                                                          10)))) {
      FailWithoutAnnotation(AnnotateOverDest(dest->status()));
    }
  }
}

void FramedSnappyWriterBase::Done() {
  PushableWriter::Done();
  uncompressed_ = Buffer();
  associated_reader_.Reset();
}

absl::Status FramedSnappyWriterBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    Writer& dest = *DestWriter();
    status = dest.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `dest` with the compressed
  // position. Clarify that the current position is the uncompressed position
  // instead of delegating to `PushableWriter::AnnotateStatusImpl()`.
  return AnnotateOverDest(std::move(status));
}

absl::Status FramedSnappyWriterBase::AnnotateOverDest(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("at uncompressed byte ", pos()));
  }
  return status;
}

void FramedSnappyWriterBase::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  if (write_size_hint == std::nullopt) {
    size_hint_ = std::nullopt;
  } else {
    size_hint_ = SaturatingAdd(pos(), *write_size_hint);
  }
}

bool FramedSnappyWriterBase::PushBehindScratch(size_t recommended_length) {
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of PushableWriter::PushBehindScratch(): "
         "some space available, use Push() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::PushBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  if (ABSL_PREDICT_FALSE(!PushInternal(dest))) return false;
  if (ABSL_PREDICT_FALSE(start_pos() == std::numeric_limits<Position>::max())) {
    return FailOverflow();
  }
  const size_t length = UnsignedMin(
      ApplyBufferConstraints(
          ApplySizeHint(snappy::kBlockSize, size_hint_, start_pos()), 1,
          recommended_length, snappy::kBlockSize),
      std::numeric_limits<Position>::max() - start_pos());
  uncompressed_.Reset(length);
  set_buffer(uncompressed_.data(), length);
  return true;
}

inline bool FramedSnappyWriterBase::PushInternal(Writer& dest) {
  const size_t uncompressed_length = start_to_cursor();
  RIEGELI_ASSERT_LE(uncompressed_length, snappy::kBlockSize)
      << "Failed invariant of FramedSnappyWriterBase: buffer too large";
  if (uncompressed_length == 0) return true;
  set_cursor(start());
  const char* const uncompressed_data = cursor();
  if (ABSL_PREDICT_FALSE(
          !dest.Push(2 * sizeof(uint32_t) +
                     snappy::MaxCompressedLength(uncompressed_length)))) {
    return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
  }
  char* const compressed_chunk = dest.cursor();
  size_t compressed_length;
  snappy::RawCompress(uncompressed_data, uncompressed_length,
                      compressed_chunk + 2 * sizeof(uint32_t),
                      &compressed_length, {/*level=*/compression_level_});
  if (compressed_length < uncompressed_length) {
    WriteLittleEndian<uint32_t>(
        IntCast<uint32_t>(0x00 /* Compressed data */ |
                          ((sizeof(uint32_t) + compressed_length) << 8)),
        compressed_chunk);
  } else {
    std::memcpy(compressed_chunk + 2 * sizeof(uint32_t), uncompressed_data,
                uncompressed_length);
    compressed_length = uncompressed_length;
    WriteLittleEndian<uint32_t>(
        IntCast<uint32_t>(0x01 /* Uncompressed data */ |
                          ((sizeof(uint32_t) + compressed_length) << 8)),
        compressed_chunk);
  }
  WriteLittleEndian<uint32_t>(
      MaskCrc32c(
          DigestFrom(absl::string_view(uncompressed_data, uncompressed_length),
                     Crc32cDigester())),
      compressed_chunk + sizeof(uint32_t));
  dest.move_cursor(2 * sizeof(uint32_t) + compressed_length);
  move_start_pos(uncompressed_length);
  return true;
}

bool FramedSnappyWriterBase::FlushBehindScratch(FlushType flush_type) {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::FlushBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  return PushInternal(dest);
}

bool FramedSnappyWriterBase::SupportsReadMode() {
  Writer* const dest = DestWriter();
  return dest != nullptr && dest->SupportsReadMode();
}

Reader* FramedSnappyWriterBase::ReadModeBehindScratch(Position initial_pos) {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::ReadModeBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!FramedSnappyWriterBase::FlushBehindScratch(
          FlushType::kFromObject))) {
    return nullptr;
  }
  Writer& dest = *DestWriter();
  Reader* const compressed_reader = dest.ReadMode(initial_compressed_pos_);
  if (ABSL_PREDICT_FALSE(compressed_reader == nullptr)) {
    FailWithoutAnnotation(AnnotateOverDest(dest.status()));
    return nullptr;
  }
  FramedSnappyReader<>* const reader =
      associated_reader_.ResetReader(compressed_reader);
  reader->Seek(initial_pos);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/snappy/framed/framed_snappy_writer.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_SNAPPY_FRAMED_FRAMED_SNAPPY_WRITER_H_
#define RIEGELI_SNAPPY_FRAMED_FRAMED_SNAPPY_WRITER_H_

#include <stddef.h>

#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffer.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/pushable_writer.h"
#include "riegeli/bytes/writer.h"
#include "snappy.h"

namespace riegeli {

template <typename Src>
class FramedSnappyReader;
class Reader;

// Template parameter independent part of `FramedSnappyWriter`.
class FramedSnappyWriterBase : public PushableWriter {
 public:
  class Options {
   public:
    Options() noexcept {}

    // Tunes the tradeoff between compression density and compression speed
    // (higher = better density but slower).
    //
    // `compression_level` must be between `kMinCompressionLevel` (1) and
    // `kMaxCompressionLevel` (2).
    static constexpr int kMinCompressionLevel =
        snappy::CompressionOptions::MinCompressionLevel();
    static constexpr int kMaxCompressionLevel =
        snappy::CompressionOptions::MaxCompressionLevel();
    static constexpr int kDefaultCompressionLevel =
        snappy::CompressionOptions::DefaultCompressionLevel();
    Options& set_compression_level(int compression_level) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GE(compression_level, kMinCompressionLevel)
          << "Failed precondition of "
             "FramedSnappyWriterBase::Options::set_compression_level(): "
             "compression level out of range";
      RIEGELI_ASSERT_LE(compression_level, kMaxCompressionLevel)
          << "Failed precondition of "
             "FramedSnappyWriterBase::Options::set_compression_level(): "
             "compression level out of range";
      compression_level_ = compression_level;
      return *this;
    }
    Options&& set_compression_level(int compression_level) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_compression_level(compression_level));
    }
    int compression_level() const { return compression_level_; }

   private:
    int compression_level_ = kDefaultCompressionLevel;
  };

  // Returns the compressed `Writer`. Unchanged by `Close()`.
  virtual Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool SupportsReadMode() override;

 protected:
  explicit FramedSnappyWriterBase(Closed) noexcept : PushableWriter(kClosed) {}

  explicit FramedSnappyWriterBase() {}

  FramedSnappyWriterBase(FramedSnappyWriterBase&& that) noexcept;
  FramedSnappyWriterBase& operator=(FramedSnappyWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(Writer* dest, int compression_level);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverDest(absl::Status status);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool PushBehindScratch(size_t recommended_length) override;
  bool FlushBehindScratch(FlushType flush_type) override;
  Reader* ReadModeBehindScratch(Position initial_pos) override;

 private:
  // Compresses buffered data, but unlike `PushSlow()`, does not ensure that a
  // buffer is allocated.
  //
  // Precondition: `ok()`
  //
  // Postcondition: `start_to_cursor() == 0`
  bool PushInternal(Writer& dest);

  int compression_level_ = Options::kDefaultCompressionLevel;
  std::optional<Position> size_hint_;
  Position initial_compressed_pos_ = 0;
  // Buffered uncompressed data.
  Buffer uncompressed_;

  AssociatedReader<FramedSnappyReader<Reader*>> associated_reader_;

  // Invariants if scratch is not used:
  //   `start() == nullptr` or `start() == uncompressed_.data()`
  //   `start_to_limit() <= snappy::kBlockSize`
};

// A `Writer` which compresses data with framed Snappy format before passing it
// to another `Writer`:
// https://github.com/google/snappy/blob/master/framing_format.txt
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the compressed `Writer`. `Dest` must support
// `Dependency<Writer*, Dest>`, e.g. `Writer*` (not owned, default),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The compressed `Writer` must not be accessed until the `FramedSnappyWriter`
// is closed or no longer used.
template <typename Dest = Writer*>
class FramedSnappyWriter : public FramedSnappyWriterBase {
 public:
  // Creates a closed `FramedSnappyWriter`.
  explicit FramedSnappyWriter(Closed) noexcept
      : FramedSnappyWriterBase(kClosed) {}

  // Will write to the compressed `Writer` provided by `dest`.
  explicit FramedSnappyWriter(Initializer<Dest> dest,
                              Options options = Options());

  FramedSnappyWriter(FramedSnappyWriter&& that) = default;
  FramedSnappyWriter& operator=(FramedSnappyWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `FramedSnappyWriter`. This
  // avoids constructing a temporary `FramedSnappyWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the compressed `Writer`.
  // Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  // The object providing and possibly owning the compressed `Writer`.
  Dependency<Writer*, Dest> dest_;
};

explicit FramedSnappyWriter(Closed) -> FramedSnappyWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit FramedSnappyWriter(
    Dest&& dest,
    FramedSnappyWriterBase::Options options = FramedSnappyWriterBase::Options())
    -> FramedSnappyWriter<TargetT<Dest>>;

// Implementation details follow.

inline FramedSnappyWriterBase::FramedSnappyWriterBase(
    FramedSnappyWriterBase&& that) noexcept
    : PushableWriter(static_cast<PushableWriter&&>(that)),
      compression_level_(that.compression_level_),
      size_hint_(that.size_hint_),
      initial_compressed_pos_(that.initial_compressed_pos_),
      uncompressed_(std::move(that.uncompressed_)),
      associated_reader_(std::move(that.associated_reader_)) {}

inline FramedSnappyWriterBase& FramedSnappyWriterBase::operator=(
    FramedSnappyWriterBase&& that) noexcept {
  PushableWriter::operator=(static_cast<PushableWriter&&>(that));
  compression_level_ = that.compression_level_;
  size_hint_ = that.size_hint_;
  initial_compressed_pos_ = that.initial_compressed_pos_;
  uncompressed_ = std::move(that.uncompressed_);
  associated_reader_ = std::move(that.associated_reader_);
  return *this;
}

inline void FramedSnappyWriterBase::Reset(Closed) {
  PushableWriter::Reset(kClosed);
  compression_level_ = Options::kDefaultCompressionLevel;
  size_hint_ = std::nullopt;
  initial_compressed_pos_ = 0;
  uncompressed_ = Buffer();
  associated_reader_.Reset();
}

inline void FramedSnappyWriterBase::Reset() {
  PushableWriter::Reset();
  compression_level_ = Options::kDefaultCompressionLevel;
  size_hint_ = std::nullopt;
  initial_compressed_pos_ = 0;
  associated_reader_.Reset();
}

template <typename Dest>
inline FramedSnappyWriter<Dest>::FramedSnappyWriter(Initializer<Dest> dest,
                                                    Options options)
    : dest_(std::move(dest)) {
  Initialize(dest_.get(), options.compression_level());
}

template <typename Dest>
inline void FramedSnappyWriter<Dest>::Reset(Closed) {
  FramedSnappyWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void FramedSnappyWriter<Dest>::Reset(Initializer<Dest> dest,
                                            Options options) {
  FramedSnappyWriterBase::Reset();
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options.compression_level());
}

template <typename Dest>
void FramedSnappyWriter<Dest>::Done() {
  FramedSnappyWriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
}

template <typename Dest>
bool FramedSnappyWriter<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!FramedSnappyWriterBase::FlushImpl(flush_type))) {
    return false;
  }
  if (flush_type != FlushType::kFromObject || dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Flush(flush_type))) {
      return FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
  return true;
}

}  // namespace riegeli

#endif  // RIEGELI_SNAPPY_FRAMED_FRAMED_SNAPPY_WRITER_H_


================================================
FILE: riegeli/snappy/hadoop/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "hadoop_snappy_reader",
    srcs = ["hadoop_snappy_reader.cc"],
    hdrs = ["hadoop_snappy_reader.h"],
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffer",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:pullable_reader",
        "//riegeli/bytes:reader",
        "//riegeli/endian:endian_reading",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@snappy",
    ],
)

cc_library(
    name = "hadoop_snappy_writer",
    srcs = ["hadoop_snappy_writer.cc"],
    hdrs = ["hadoop_snappy_writer.h"],
    deps = [
        ":hadoop_snappy_reader",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffer",
        "//riegeli/base:buffering",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:pushable_writer",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:writer",
        "//riegeli/endian:endian_writing",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@snappy",
    ],
)


================================================
FILE: riegeli/snappy/hadoop/hadoop_snappy_reader.cc
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/snappy/hadoop/hadoop_snappy_reader.h"

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <memory>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffer.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/pullable_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/endian/endian_reading.h"
#include "snappy.h"

namespace riegeli {

void HadoopSnappyReaderBase::Initialize(Reader* src) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of HadoopSnappyReader: null Reader pointer";
  if (ABSL_PREDICT_FALSE(!src->ok()) && src->available() == 0) {
    FailWithoutAnnotation(AnnotateOverSrc(src->status()));
    return;
  }
  initial_compressed_pos_ = src->pos();
}

void HadoopSnappyReaderBase::Done() {
  if (ABSL_PREDICT_FALSE(truncated_)) {
    Reader& src = *SrcReader();
    FailWithoutAnnotation(
        AnnotateOverSrc(src.AnnotateStatus(absl::InvalidArgumentError(
            "Truncated HadoopSnappy-compressed stream"))));
  }
  PullableReader::Done();
  uncompressed_ = Buffer();
}

inline bool HadoopSnappyReaderBase::FailInvalidStream(
    absl::string_view message) {
  return Fail(absl::InvalidArgumentError(
      absl::StrCat("Invalid HadoopSnappy-compressed stream: ", message)));
}

absl::Status HadoopSnappyReaderBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    if (ABSL_PREDICT_FALSE(truncated_)) {
      status =
          Annotate(status, "reading truncated HadoopSnappy-compressed stream");
    }
    Reader& src = *SrcReader();
    status = src.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `src` with the compressed position.
  // Clarify that the current position is the uncompressed position instead of
  // delegating to `PullableReader::AnnotateStatusImpl()`.
  return AnnotateOverSrc(std::move(status));
}

absl::Status HadoopSnappyReaderBase::AnnotateOverSrc(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("at uncompressed byte ", pos()));
  }
  return status;
}

bool HadoopSnappyReaderBase::PullBehindScratch(size_t recommended_length) {
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of PullableReader::PullBehindScratch(): "
         "some data available, use Pull() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::PullBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Reader& src = *SrcReader();
  truncated_ = false;
  while (remaining_chunk_length_ == 0) {
    if (ABSL_PREDICT_FALSE(!src.Pull(sizeof(uint32_t)))) {
      set_buffer();
      if (ABSL_PREDICT_FALSE(!src.ok())) {
        return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
      }
      if (ABSL_PREDICT_FALSE(src.available() > 0)) truncated_ = true;
      return false;
    }
    remaining_chunk_length_ = ReadBigEndian<uint32_t>(src.cursor());
    src.move_cursor(sizeof(uint32_t));
  }
  size_t uncompressed_length;
  char* uncompressed_data;
  do {
    if (ABSL_PREDICT_FALSE(!src.Pull(sizeof(uint32_t)))) {
      set_buffer();
      if (ABSL_PREDICT_FALSE(!src.ok())) {
        return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
      }
      truncated_ = true;
      return false;
    }
    const uint32_t compressed_length = ReadBigEndian<uint32_t>(src.cursor());
    if (ABSL_PREDICT_FALSE(compressed_length >
                           std::numeric_limits<uint32_t>::max() -
                               sizeof(uint32_t))) {
      set_buffer();
      return FailInvalidStream("compressed length too large");
    }
    if (ABSL_PREDICT_FALSE(!src.Pull(sizeof(uint32_t) + compressed_length))) {
      set_buffer();
      if (ABSL_PREDICT_FALSE(!src.ok())) {
        return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
      }
      truncated_ = true;
      return false;
    }
    const char* const compressed_data = src.cursor() + sizeof(uint32_t);
    if (ABSL_PREDICT_FALSE(!snappy::GetUncompressedLength(
            compressed_data, compressed_length, &uncompressed_length))) {
      set_buffer();
      return FailInvalidStream("invalid uncompressed length");
    }
    if (ABSL_PREDICT_FALSE(uncompressed_length > remaining_chunk_length_)) {
      set_buffer();
      return FailInvalidStream("uncompressed length too large");
    }
    uncompressed_.Reset(uncompressed_length);
    uncompressed_data = uncompressed_.data();
    if (ABSL_PREDICT_FALSE(!snappy::RawUncompress(
            compressed_data, compressed_length, uncompressed_data))) {
      set_buffer();
      return FailInvalidStream("invalid compressed data");
    }
    src.move_cursor(sizeof(uint32_t) + compressed_length);
  } while (uncompressed_length == 0);
  remaining_chunk_length_ -= uncompressed_length;
  const Position max_length =
      std::numeric_limits<Position>::max() - limit_pos();
  if (ABSL_PREDICT_FALSE(uncompressed_length > max_length)) {
    set_buffer(uncompressed_data, IntCast<size_t>(max_length));
    move_limit_pos(available());
    return FailOverflow();
  }
  set_buffer(uncompressed_data, uncompressed_length);
  move_limit_pos(available());
  return true;
}

bool HadoopSnappyReaderBase::ToleratesReadingAhead() {
  Reader* const src = SrcReader();
  return src != nullptr && src->ToleratesReadingAhead();
}

bool HadoopSnappyReaderBase::SupportsRewind() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsRewind();
}

bool HadoopSnappyReaderBase::SeekBehindScratch(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of PullableReader::SeekBehindScratch(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PullableReader::SeekBehindScratch(): "
         "scratch used";
  if (new_pos <= limit_pos()) {
    // Seeking backwards.
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    Reader& src = *SrcReader();
    truncated_ = false;
    remaining_chunk_length_ = 0;
    set_buffer();
    set_limit_pos(0);
    if (ABSL_PREDICT_FALSE(!src.Seek(initial_compressed_pos_))) {
      return FailWithoutAnnotation(
          AnnotateOverSrc(src.StatusOrAnnotate(absl::DataLossError(
              "HadoopSnappy-compressed stream got truncated"))));
    }
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    if (new_pos == 0) return true;
  }
  return PullableReader::SeekBehindScratch(new_pos);
}

bool HadoopSnappyReaderBase::SupportsNewReader() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsNewReader();
}

std::unique_ptr<Reader> HadoopSnappyReaderBase::NewReaderImpl(
    Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point
  // if `SrcReader()->SupportsNewReader()`.
  Reader& src = *SrcReader();
  std::unique_ptr<Reader> compressed_reader =
      src.NewReader(initial_compressed_pos_);
  if (ABSL_PREDICT_FALSE(compressed_reader == nullptr)) {
    FailWithoutAnnotation(AnnotateOverSrc(src.status()));
    return nullptr;
  }
  std::unique_ptr<Reader> reader =
      std::make_unique<HadoopSnappyReader<std::unique_ptr<Reader>>>(
          std::move(compressed_reader));
  reader->Seek(initial_pos);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/snappy/hadoop/hadoop_snappy_reader.h
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_SNAPPY_HADOOP_HADOOP_SNAPPY_READER_H_
#define RIEGELI_SNAPPY_HADOOP_HADOOP_SNAPPY_READER_H_

#include <stddef.h>
#include <stdint.h>

#include <memory>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/buffer.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/pullable_reader.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

// Template parameter independent part of `HadoopSnappyReader`.
class HadoopSnappyReaderBase : public PullableReader {
 public:
  class Options {};

  // Returns the compressed `Reader`. Unchanged by `Close()`.
  virtual Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool ToleratesReadingAhead() override;
  bool SupportsRewind() override;
  bool SupportsNewReader() override;

 protected:
  using PullableReader::PullableReader;

  HadoopSnappyReaderBase(HadoopSnappyReaderBase&& that) noexcept;
  HadoopSnappyReaderBase& operator=(HadoopSnappyReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(Reader* src);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverSrc(absl::Status status);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool PullBehindScratch(size_t recommended_length) override;
  bool SeekBehindScratch(Position new_pos) override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;

 private:
  ABSL_ATTRIBUTE_COLD bool FailInvalidStream(absl::string_view message);

  Position initial_compressed_pos_ = 0;
  // If `true`, the source is truncated (without a clean end of the compressed
  // stream) at the current position. If the source does not grow, `Close()`
  // will fail.
  bool truncated_ = false;
  // Remaining number of uncompressed bytes in the current chunk.
  uint32_t remaining_chunk_length_ = 0;
  // Buffered uncompressed data.
  Buffer uncompressed_;

  // Invariant if scratch is not used:
  //   `start() == nullptr` or `start() == uncompressed_.data()`
};

// A `Reader` which decompresses data with Hadoop Snappy format after getting
// it from another `Reader`:
// https://github.com/kubo/snzip#hadoop-snappy-format
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the compressed `Reader`. `Src` must support
// `Dependency<Reader*, Src>`, e.g. `Reader*` (not owned, default),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The compressed `Reader` must not be accessed until the `HadoopSnappyReader`
// is closed or no longer used.
template <typename Src = Reader*>
class HadoopSnappyReader : public HadoopSnappyReaderBase {
 public:
  // Creates a closed `HadoopSnappyReader`.
  explicit HadoopSnappyReader(Closed) noexcept
      : HadoopSnappyReaderBase(kClosed) {}

  // Will read from the compressed `Reader` provided by `src`.
  explicit HadoopSnappyReader(Initializer<Src> src,
                              Options options = Options());

  HadoopSnappyReader(HadoopSnappyReader&& that) = default;
  HadoopSnappyReader& operator=(HadoopSnappyReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `HadoopSnappyReader`. This
  // avoids constructing a temporary `HadoopSnappyReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());

  // Returns the object providing and possibly owning the compressed `Reader`.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;
  void SetReadAllHintImpl(bool read_all_hint) override;
  void VerifyEndImpl() override;

 private:
  // The object providing and possibly owning the compressed `Reader`.
  Dependency<Reader*, Src> src_;
};

explicit HadoopSnappyReader(Closed) -> HadoopSnappyReader<DeleteCtad<Closed>>;
template <typename Src>
explicit HadoopSnappyReader(Src&& src, HadoopSnappyReaderBase::Options options =
                                           HadoopSnappyReaderBase::Options())
    -> HadoopSnappyReader<TargetT<Src>>;

// Implementation details follow.

inline HadoopSnappyReaderBase::HadoopSnappyReaderBase(
    HadoopSnappyReaderBase&& that) noexcept
    : PullableReader(static_cast<PullableReader&&>(that)),
      initial_compressed_pos_(that.initial_compressed_pos_),
      truncated_(that.truncated_),
      remaining_chunk_length_(that.remaining_chunk_length_),
      uncompressed_(std::move(that.uncompressed_)) {}

inline HadoopSnappyReaderBase& HadoopSnappyReaderBase::operator=(
    HadoopSnappyReaderBase&& that) noexcept {
  PullableReader::operator=(static_cast<PullableReader&&>(that));
  initial_compressed_pos_ = that.initial_compressed_pos_;
  truncated_ = that.truncated_;
  remaining_chunk_length_ = that.remaining_chunk_length_;
  uncompressed_ = std::move(that.uncompressed_);
  return *this;
}

inline void HadoopSnappyReaderBase::Reset(Closed) {
  PullableReader::Reset(kClosed);
  initial_compressed_pos_ = 0;
  truncated_ = false;
  remaining_chunk_length_ = 0;
  uncompressed_ = Buffer();
}

inline void HadoopSnappyReaderBase::Reset() {
  PullableReader::Reset();
  initial_compressed_pos_ = 0;
  truncated_ = false;
  remaining_chunk_length_ = 0;
}

template <typename Src>
inline HadoopSnappyReader<Src>::HadoopSnappyReader(
    Initializer<Src> src, ABSL_ATTRIBUTE_UNUSED Options options)
    : src_(std::move(src)) {
  Initialize(src_.get());
}

template <typename Src>
inline void HadoopSnappyReader<Src>::Reset(Closed) {
  HadoopSnappyReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void HadoopSnappyReader<Src>::Reset(
    Initializer<Src> src, ABSL_ATTRIBUTE_UNUSED Options options) {
  HadoopSnappyReaderBase::Reset();
  src_.Reset(std::move(src));
  Initialize(src_.get());
}

template <typename Src>
void HadoopSnappyReader<Src>::Done() {
  HadoopSnappyReaderBase::Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      FailWithoutAnnotation(AnnotateOverSrc(src_->status()));
    }
  }
}

template <typename Src>
void HadoopSnappyReader<Src>::SetReadAllHintImpl(bool read_all_hint) {
  if (src_.IsOwning()) src_->SetReadAllHint(read_all_hint);
}

template <typename Src>
void HadoopSnappyReader<Src>::VerifyEndImpl() {
  HadoopSnappyReaderBase::VerifyEndImpl();
  if (src_.IsOwning() && ABSL_PREDICT_TRUE(ok())) src_->VerifyEnd();
}

}  // namespace riegeli

#endif  // RIEGELI_SNAPPY_HADOOP_HADOOP_SNAPPY_READER_H_


================================================
FILE: riegeli/snappy/hadoop/hadoop_snappy_writer.cc
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/snappy/hadoop/hadoop_snappy_writer.h"

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffer.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/pushable_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/endian/endian_writing.h"
#include "riegeli/snappy/hadoop/hadoop_snappy_reader.h"
#include "snappy.h"

namespace riegeli {

void HadoopSnappyWriterBase::Initialize(Writer* dest, int compression_level) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of HadoopSnappyWriter: null Writer pointer";
  compression_level_ = compression_level;
  if (ABSL_PREDICT_FALSE(!dest->ok())) {
    FailWithoutAnnotation(AnnotateOverDest(dest->status()));
  }
}

void HadoopSnappyWriterBase::Done() {
  PushableWriter::Done();
  uncompressed_ = Buffer();
  associated_reader_.Reset();
}

absl::Status HadoopSnappyWriterBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    Writer& dest = *DestWriter();
    status = dest.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `dest` with the compressed
  // position. Clarify that the current position is the uncompressed position
  // instead of delegating to `PushableWriter::AnnotateStatusImpl()`.
  return AnnotateOverDest(std::move(status));
}

absl::Status HadoopSnappyWriterBase::AnnotateOverDest(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("at uncompressed byte ", pos()));
  }
  return status;
}

void HadoopSnappyWriterBase::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  if (write_size_hint == std::nullopt) {
    size_hint_ = std::nullopt;
  } else {
    size_hint_ = SaturatingAdd(pos(), *write_size_hint);
  }
}

bool HadoopSnappyWriterBase::PushBehindScratch(size_t recommended_length) {
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of PushableWriter::PushBehindScratch(): "
         "some space available, use Push() instead";
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::PushBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  if (ABSL_PREDICT_FALSE(!PushInternal(dest))) return false;
  if (ABSL_PREDICT_FALSE(start_pos() == std::numeric_limits<Position>::max())) {
    return FailOverflow();
  }
  const size_t length = UnsignedMin(
      ApplyBufferConstraints(
          ApplySizeHint(snappy::kBlockSize, size_hint_, start_pos()), 1,
          recommended_length, snappy::kBlockSize),
      std::numeric_limits<Position>::max() - start_pos());
  uncompressed_.Reset(length);
  set_buffer(uncompressed_.data(), length);
  return true;
}

inline bool HadoopSnappyWriterBase::PushInternal(Writer& dest) {
  const size_t uncompressed_length = start_to_cursor();
  RIEGELI_ASSERT_LE(uncompressed_length, snappy::kBlockSize)
      << "Failed invariant of HadoopSnappyWriterBase: buffer too large";
  if (uncompressed_length == 0) return true;
  set_cursor(start());
  const char* const uncompressed_data = cursor();
  if (ABSL_PREDICT_FALSE(
          !dest.Push(2 * sizeof(uint32_t) +
                     snappy::MaxCompressedLength(uncompressed_length)))) {
    return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
  }
  char* const compressed_chunk = dest.cursor();
  WriteBigEndian<uint32_t>(IntCast<uint32_t>(uncompressed_length),
                           compressed_chunk);
  size_t compressed_length;
  snappy::RawCompress(uncompressed_data, uncompressed_length,
                      compressed_chunk + 2 * sizeof(uint32_t),
                      &compressed_length, {/*level=*/compression_level_});
  WriteBigEndian<uint32_t>(IntCast<uint32_t>(compressed_length),
                           compressed_chunk + sizeof(uint32_t));
  dest.move_cursor(2 * sizeof(uint32_t) + compressed_length);
  move_start_pos(uncompressed_length);
  return true;
}

bool HadoopSnappyWriterBase::FlushBehindScratch(FlushType flush_type) {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::FlushBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  return PushInternal(dest);
}

bool HadoopSnappyWriterBase::SupportsReadMode() {
  Writer* const dest = DestWriter();
  return dest != nullptr && dest->SupportsReadMode();
}

Reader* HadoopSnappyWriterBase::ReadModeBehindScratch(Position initial_pos) {
  RIEGELI_ASSERT(!scratch_used())
      << "Failed precondition of PushableWriter::ReadModeBehindScratch(): "
         "scratch used";
  if (ABSL_PREDICT_FALSE(!HadoopSnappyWriterBase::FlushBehindScratch(
          FlushType::kFromObject))) {
    return nullptr;
  }
  Writer& dest = *DestWriter();
  Reader* const compressed_reader = dest.ReadMode(initial_compressed_pos_);
  if (ABSL_PREDICT_FALSE(compressed_reader == nullptr)) {
    FailWithoutAnnotation(AnnotateOverDest(dest.status()));
    return nullptr;
  }
  HadoopSnappyReader<>* const reader =
      associated_reader_.ResetReader(compressed_reader);
  reader->Seek(initial_pos);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/snappy/hadoop/hadoop_snappy_writer.h
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_SNAPPY_HADOOP_HADOOP_SNAPPY_WRITER_H_
#define RIEGELI_SNAPPY_HADOOP_HADOOP_SNAPPY_WRITER_H_

#include <stddef.h>

#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffer.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/pushable_writer.h"
#include "riegeli/bytes/writer.h"
#include "snappy.h"

namespace riegeli {

template <typename Src>
class HadoopSnappyReader;
class Reader;

// Template parameter independent part of `HadoopSnappyWriter`.
class HadoopSnappyWriterBase : public PushableWriter {
 public:
  class Options {
   public:
    Options() noexcept {}

    // Tunes the tradeoff between compression density and compression speed
    // (higher = better density but slower).
    //
    // `compression_level` must be between `kMinCompressionLevel` (1) and
    // `kMaxCompressionLevel` (2).
    static constexpr int kMinCompressionLevel =
        snappy::CompressionOptions::MinCompressionLevel();
    static constexpr int kMaxCompressionLevel =
        snappy::CompressionOptions::MaxCompressionLevel();
    static constexpr int kDefaultCompressionLevel =
        snappy::CompressionOptions::DefaultCompressionLevel();
    Options& set_compression_level(int compression_level) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GE(compression_level, kMinCompressionLevel)
          << "Failed precondition of "
             "HadoopSnappyWriterBase::Options::set_compression_level(): "
             "compression level out of range";
      RIEGELI_ASSERT_LE(compression_level, kMaxCompressionLevel)
          << "Failed precondition of "
             "HadoopSnappyWriterBase::Options::set_compression_level(): "
             "compression level out of range";
      compression_level_ = compression_level;
      return *this;
    }
    Options&& set_compression_level(int compression_level) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_compression_level(compression_level));
    }
    int compression_level() const { return compression_level_; }

   private:
    int compression_level_ = kDefaultCompressionLevel;
  };

  // Returns the compressed `Writer`. Unchanged by `Close()`.
  virtual Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool SupportsReadMode() override;

 protected:
  explicit HadoopSnappyWriterBase(Closed) noexcept : PushableWriter(kClosed) {}

  explicit HadoopSnappyWriterBase() {}

  HadoopSnappyWriterBase(HadoopSnappyWriterBase&& that) noexcept;
  HadoopSnappyWriterBase& operator=(HadoopSnappyWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(Writer* dest, int compression_level);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverDest(absl::Status status);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool PushBehindScratch(size_t recommended_length) override;
  bool FlushBehindScratch(FlushType flush_type) override;
  Reader* ReadModeBehindScratch(Position initial_pos) override;

 private:
  // Compresses buffered data, but unlike `PushSlow()`, does not ensure that a
  // buffer is allocated.
  //
  // Precondition: `ok()`
  //
  // Postcondition: `start_to_cursor() == 0`
  bool PushInternal(Writer& dest);

  int compression_level_ = Options::kDefaultCompressionLevel;
  std::optional<Position> size_hint_;
  Position initial_compressed_pos_ = 0;
  // Buffered uncompressed data.
  Buffer uncompressed_;

  AssociatedReader<HadoopSnappyReader<Reader*>> associated_reader_;

  // Invariants if scratch is not used:
  //   `start() == nullptr` or `start() == uncompressed_.data()`
  //   `start_to_limit() <= snappy::kBlockSize`
};

// A `Writer` which compresses data with Hadoop Snappy format before passing it
// to another `Writer`:
// https://github.com/kubo/snzip#hadoop-snappy-format
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the compressed `Writer`. `Dest` must support
// `Dependency<Writer*, Dest>`, e.g. `Writer*` (not owned, default),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The compressed `Writer` must not be accessed until the `HadoopSnappyWriter`
// is closed or no longer used.
template <typename Dest = Writer*>
class HadoopSnappyWriter : public HadoopSnappyWriterBase {
 public:
  // Creates a closed `HadoopSnappyWriter`.
  explicit HadoopSnappyWriter(Closed) noexcept
      : HadoopSnappyWriterBase(kClosed) {}

  // Will write to the compressed `Writer` provided by `dest`.
  explicit HadoopSnappyWriter(Initializer<Dest> dest,
                              Options options = Options());

  HadoopSnappyWriter(HadoopSnappyWriter&& that) = default;
  HadoopSnappyWriter& operator=(HadoopSnappyWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `HadoopSnappyWriter`. This
  // avoids constructing a temporary `HadoopSnappyWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the compressed `Writer`.
  // Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  // The object providing and possibly owning the compressed `Writer`.
  Dependency<Writer*, Dest> dest_;
};

explicit HadoopSnappyWriter(Closed) -> HadoopSnappyWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit HadoopSnappyWriter(
    Dest&& dest,
    HadoopSnappyWriterBase::Options options = HadoopSnappyWriterBase::Options())
    -> HadoopSnappyWriter<TargetT<Dest>>;

// Implementation details follow.

inline HadoopSnappyWriterBase::HadoopSnappyWriterBase(
    HadoopSnappyWriterBase&& that) noexcept
    : PushableWriter(static_cast<PushableWriter&&>(that)),
      compression_level_(that.compression_level_),
      size_hint_(that.size_hint_),
      initial_compressed_pos_(that.initial_compressed_pos_),
      uncompressed_(std::move(that.uncompressed_)),
      associated_reader_(std::move(that.associated_reader_)) {}

inline HadoopSnappyWriterBase& HadoopSnappyWriterBase::operator=(
    HadoopSnappyWriterBase&& that) noexcept {
  PushableWriter::operator=(static_cast<PushableWriter&&>(that));
  compression_level_ = that.compression_level_;
  size_hint_ = that.size_hint_;
  initial_compressed_pos_ = that.initial_compressed_pos_;
  uncompressed_ = std::move(that.uncompressed_);
  associated_reader_ = std::move(that.associated_reader_);
  return *this;
}

inline void HadoopSnappyWriterBase::Reset(Closed) {
  PushableWriter::Reset(kClosed);
  compression_level_ = Options::kDefaultCompressionLevel;
  size_hint_ = std::nullopt;
  initial_compressed_pos_ = 0;
  uncompressed_ = Buffer();
  associated_reader_.Reset();
}

inline void HadoopSnappyWriterBase::Reset() {
  PushableWriter::Reset();
  compression_level_ = Options::kDefaultCompressionLevel;
  size_hint_ = std::nullopt;
  initial_compressed_pos_ = 0;
  associated_reader_.Reset();
}

template <typename Dest>
inline HadoopSnappyWriter<Dest>::HadoopSnappyWriter(Initializer<Dest> dest,
                                                    Options options)
    : dest_(std::move(dest)) {
  Initialize(dest_.get(), options.compression_level());
}

template <typename Dest>
inline void HadoopSnappyWriter<Dest>::Reset(Closed) {
  HadoopSnappyWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void HadoopSnappyWriter<Dest>::Reset(Initializer<Dest> dest,
                                            Options options) {
  HadoopSnappyWriterBase::Reset();
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options.compression_level());
}

template <typename Dest>
void HadoopSnappyWriter<Dest>::Done() {
  HadoopSnappyWriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
}

template <typename Dest>
bool HadoopSnappyWriter<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!HadoopSnappyWriterBase::FlushImpl(flush_type))) {
    return false;
  }
  if (flush_type != FlushType::kFromObject || dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Flush(flush_type))) {
      return FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
  return true;
}

}  // namespace riegeli

#endif  // RIEGELI_SNAPPY_HADOOP_HADOOP_SNAPPY_WRITER_H_


================================================
FILE: riegeli/snappy/snappy_reader.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/snappy/snappy_reader.h"

#include <stddef.h>
#include <stdint.h>

#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/chain_reader.h"
#include "riegeli/bytes/chain_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/snappy/snappy_streams.h"
#include "riegeli/varint/varint_reading.h"
#include "snappy.h"

namespace riegeli {

void SnappyReaderBase::Initialize(Reader* src) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of SnappyReader: null Reader pointer";
  if (ABSL_PREDICT_FALSE(!src->ok()) && src->available() == 0) {
    FailWithoutAnnotation(AnnotateOverSrc(src->status()));
    return;
  }
  Chain decompressed;
  if (absl::Status status =
          SnappyDecompress(*src, ChainWriter<>(&decompressed));
      ABSL_PREDICT_FALSE(!status.ok())) {
    FailWithoutAnnotation(std::move(status));
    return;
  }
  // `SnappyReaderBase` derives from `ChainReader<Chain>` but the `Chain` to
  // read from was not known in `SnappyReaderBase` constructor. This sets the
  // `Chain` and updates the `ChainReader` to read from it.
  ChainReader::Reset(std::move(decompressed));
}

void SnappyReaderBase::Done() {
  ChainReader::Done();
  ChainReader::src() = Chain();
}

absl::Status SnappyReaderBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    Reader& src = *SrcReader();
    status = src.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `src` with the compressed position.
  // Clarify that the current position is the uncompressed position instead of
  // delegating to `ChainReader::AnnotateStatusImpl()`.
  return AnnotateOverSrc(std::move(status));
}

absl::Status SnappyReaderBase::AnnotateOverSrc(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("at uncompressed byte ", pos()));
  }
  return status;
}

namespace snappy_internal {

absl::Status SnappyDecompressImpl(Reader& src, Writer& dest) {
  const std::optional<Position> size = src.Size();
  if (ABSL_PREDICT_FALSE(size == std::nullopt)) return src.status();
  ReaderSnappySource source(&src, *size);
  WriterSnappySink sink(&dest);
  const bool uncompress_ok = snappy::Uncompress(&source, &sink);
  if (ABSL_PREDICT_FALSE(!dest.ok())) return dest.status();
  if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
  if (ABSL_PREDICT_FALSE(!uncompress_ok)) {
    return Annotate(src.AnnotateStatus(absl::InvalidArgumentError(
                        "Invalid snappy-compressed stream")),
                    absl::StrCat("at uncompressed byte ", dest.pos()));
  }
  return absl::OkStatus();
}

}  // namespace snappy_internal

std::optional<size_t> SnappyUncompressedSize(Reader& src) {
  if (ABSL_PREDICT_FALSE(!src.Pull(1, kMaxLengthVarint32))) return std::nullopt;
  if (ABSL_PREDICT_FALSE(src.available() < kMaxLengthVarint32)) {
    size_t length = 1;
    while (length < kMaxLengthVarint32 &&
           static_cast<uint8_t>(src.cursor()[length - 1]) >= 0x80) {
      ++length;
      if (ABSL_PREDICT_FALSE(!src.Pull(length, kMaxLengthVarint32))) {
        return std::nullopt;
      }
    }
  }
  uint32_t size;
  if (ABSL_PREDICT_FALSE(ReadVarint32(src.cursor(), src.available(), size) ==
                         0)) {
    return std::nullopt;
  }
  return size;
}

}  // namespace riegeli


================================================
FILE: riegeli/snappy/snappy_reader.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_SNAPPY_SNAPPY_READER_H_
#define RIEGELI_SNAPPY_SNAPPY_READER_H_

#include <stddef.h>

#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/bytes/chain_reader.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

class Writer;

// Template parameter independent part of `SnappyReader`.
class SnappyReaderBase : public ChainReader<Chain> {
 public:
  class Options {};

  // Returns the compressed `Reader`. Unchanged by `Close()`.
  virtual Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

 protected:
  explicit SnappyReaderBase(Closed) noexcept : ChainReader(kClosed) {}

  SnappyReaderBase();

  SnappyReaderBase(SnappyReaderBase&& that) noexcept;
  SnappyReaderBase& operator=(SnappyReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(Reader* src);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverSrc(absl::Status status);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
};

// A `Reader` which decompresses data with Snappy after getting it from another
// `Reader`.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the compressed `Reader`. `Src` must support
// `Dependency<Reader*, Src>`, e.g. `Reader*` (not owned, default),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The compressed `Reader` must support `Size()`. To supply or override this
// size, the `Reader` can be wrapped in a `LimitingReader` with
// `LimitingReaderBase::Options().set_exact_length(size)`.
//
// The compressed `Reader` must not be accessed until the `SnappyReader` is
// closed or no longer used.
//
// `SnappyReader` does not decompress incrementally but reads compressed data
// and decompresses them all in the constructor.
//
// `SnappyReader` does not support reading from a growing source. If source is
// truncated, decompression fails.
template <typename Src = Reader*>
class SnappyReader : public SnappyReaderBase {
 public:
  // Creates a closed `SnappyReader`.
  explicit SnappyReader(Closed) noexcept : SnappyReaderBase(kClosed) {}

  // Will read from the compressed `Reader` provided by `src`.
  explicit SnappyReader(Initializer<Src> src, Options options = Options());

  SnappyReader(SnappyReader&& that) = default;
  SnappyReader& operator=(SnappyReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `SnappyReader`. This avoids
  // constructing a temporary `SnappyReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());

  // Returns the object providing and possibly owning the compressed `Reader`.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;
  void VerifyEndImpl() override;

 private:
  // The object providing and possibly owning the compressed `Reader`.
  Dependency<Reader*, Src> src_;
};

explicit SnappyReader(Closed) -> SnappyReader<DeleteCtad<Closed>>;
template <typename Src>
explicit SnappyReader(
    Src&& src, SnappyReaderBase::Options options = SnappyReaderBase::Options())
    -> SnappyReader<TargetT<Src>>;

// An alternative interface to Snappy which avoids buffering uncompressed data.
// Calling `SnappyDecompress()` is equivalent to copying all data from a
// `SnappyReader<Src&&>` to `dest`.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the compressed `Reader`. `Src` must support
// `DependencyRef<Reader*, Src>`, e.g. `Reader&` (not owned),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `AnyRef<Reader*>` (maybe owned).
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the uncompressed `Writer`. `Dest` must support
// `DependencyRef<Writer*, Dest>`, e.g. `Writer&` (not owned),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `AnyRef<Writer*>` (maybe owned).
//
// The compressed `Reader` must support `Size()`. To supply or override this
// size, the `Reader` can be wrapped in a `LimitingReader` with
// `LimitingReaderBase::Options().set_exact_length(size)`.
template <typename Src, typename Dest,
          std::enable_if_t<
              std::conjunction_v<TargetRefSupportsDependency<Reader*, Src>,
                                 TargetRefSupportsDependency<Writer*, Dest>>,
              int> = 0>
absl::Status SnappyDecompress(Src&& src, Dest&& dest);

// Returns the claimed uncompressed size of Snappy-compressed data.
//
// Returns `std::nullopt` on failure.
//
// The current position of `src` is unchanged.
std::optional<size_t> SnappyUncompressedSize(Reader& src);

// Implementation details follow.

inline SnappyReaderBase::SnappyReaderBase()
    // Empty `Chain` as the `ChainReader` source is a placeholder, it will be
    // set by `Initialize()`.
    : ChainReader(riegeli::Maker()) {}

inline SnappyReaderBase::SnappyReaderBase(SnappyReaderBase&& that) noexcept
    : ChainReader(static_cast<ChainReader&&>(that)) {}

inline SnappyReaderBase& SnappyReaderBase::operator=(
    SnappyReaderBase&& that) noexcept {
  ChainReader::operator=(static_cast<ChainReader&&>(that));
  return *this;
}

inline void SnappyReaderBase::Reset(Closed) { ChainReader::Reset(kClosed); }

inline void SnappyReaderBase::Reset() {
  // Empty `Chain` as the `ChainReader` source is a placeholder, it will be set
  // by `Initialize()`.
  ChainReader::Reset(riegeli::Maker());
}

template <typename Src>
inline SnappyReader<Src>::SnappyReader(Initializer<Src> src,
                                       ABSL_ATTRIBUTE_UNUSED Options options)
    : src_(std::move(src)) {
  Initialize(src_.get());
}

template <typename Src>
inline void SnappyReader<Src>::Reset(Closed) {
  SnappyReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void SnappyReader<Src>::Reset(Initializer<Src> src,
                                     ABSL_ATTRIBUTE_UNUSED Options options) {
  SnappyReaderBase::Reset();
  src_.Reset(std::move(src));
  Initialize(src_.get());
}

template <typename Src>
void SnappyReader<Src>::Done() {
  SnappyReaderBase::Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      FailWithoutAnnotation(AnnotateOverSrc(src_->status()));
    }
  }
}

template <typename Src>
void SnappyReader<Src>::VerifyEndImpl() {
  SnappyReaderBase::VerifyEndImpl();
  if (src_.IsOwning() && ABSL_PREDICT_TRUE(ok())) src_->VerifyEnd();
}

namespace snappy_internal {

absl::Status SnappyDecompressImpl(Reader& src, Writer& dest);

}  // namespace snappy_internal

template <typename Src, typename Dest,
          std::enable_if_t<
              std::conjunction_v<TargetRefSupportsDependency<Reader*, Src>,
                                 TargetRefSupportsDependency<Writer*, Dest>>,
              int>>
inline absl::Status SnappyDecompress(Src&& src, Dest&& dest) {
  DependencyRef<Reader*, Src> src_dep(std::forward<Src>(src));
  DependencyRef<Writer*, Dest> dest_dep(std::forward<Dest>(dest));
  if (src_dep.IsOwning()) src_dep->SetReadAllHint(true);
  if (dest_dep.IsOwning()) {
    dest_dep->SetWriteSizeHint(SnappyUncompressedSize(*src_dep));
  }
  absl::Status status =
      snappy_internal::SnappyDecompressImpl(*src_dep, *dest_dep);
  if (dest_dep.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_dep->Close())) {
      status.Update(dest_dep->status());
    }
  }
  if (src_dep.IsOwning()) {
    if (ABSL_PREDICT_TRUE(status.ok())) src_dep->VerifyEnd();
    if (ABSL_PREDICT_FALSE(!src_dep->Close())) status.Update(src_dep->status());
  }
  return status;
}

}  // namespace riegeli

#endif  // RIEGELI_SNAPPY_SNAPPY_READER_H_


================================================
FILE: riegeli/snappy/snappy_streams.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/snappy/snappy_streams.h"

#include <stddef.h>

#include <functional>

#include "absl/base/optimization.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"

namespace riegeli::snappy_internal {

void WriterSnappySink::Append(const char* src, size_t length) {
  RIEGELI_ASSERT(std::less_equal<>()(src, dest_->cursor()) ||
                 std::greater_equal<>()(src, dest_->limit()))
      << "Failed precondition of Sink::Append(): "
         "appending a pointer to the middle of GetAppendBuffer()";
  // Check also `dest_->available()` because `dest_->cursor()` might point to
  // the end of a memory block, and `src` to the beginning of an unrelated
  // memory block.
  if (src == dest_->cursor() && ABSL_PREDICT_TRUE(dest_->available() > 0)) {
    // Appending a prefix of the result of `GetAppendBuffer()`.
    RIEGELI_ASSERT_LE(length, dest_->available())
        << "Failed precondition of Sink::Append(): "
           "appending the result of GetAppendBuffer() with length too large";
    dest_->move_cursor(length);
  } else {
    dest_->Write(absl::string_view(src, length));
  }
}

char* WriterSnappySink::GetAppendBuffer(size_t length, char* scratch) {
  if (ABSL_PREDICT_TRUE(dest_->Push(length))) {
    return dest_->cursor();
  } else {
    return scratch;
  }
}

void WriterSnappySink::AppendAndTakeOwnership(
    char* src, size_t length, void (*deleter)(void*, const char*, size_t),
    void* deleter_arg) {
  dest_->Write(ExternalRef::From(
      [deleter, deleter_arg](absl::string_view data) {
        deleter(deleter_arg, data.data(), data.size());
      },
      absl::string_view(src, length)));
}

char* WriterSnappySink::GetAppendBufferVariable(size_t min_length,
                                                size_t recommended_length,
                                                char* scratch,
                                                size_t scratch_length,
                                                size_t* result_length) {
  RIEGELI_ASSERT_GE(scratch_length, min_length)
      << "Failed precondition of Sink::GetAppendBufferVariable(): "
         "scratch length too small";
  if (ABSL_PREDICT_TRUE(dest_->Push(min_length, recommended_length))) {
    *result_length = dest_->available();
    return dest_->cursor();
  } else {
    *result_length = scratch_length;
    return scratch;
  }
}

size_t ReaderSnappySource::Available() const {
  return SaturatingIntCast<size_t>(SaturatingSub(size_, src_->pos()));
}

const char* ReaderSnappySource::Peek(size_t* length) {
  src_->Pull();
  *length = src_->available();
  return src_->cursor();
}

void ReaderSnappySource::Skip(size_t length) { src_->Skip(length); }

}  // namespace riegeli::snappy_internal


================================================
FILE: riegeli/snappy/snappy_streams.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_SNAPPY_SNAPPY_STREAMS_H_
#define RIEGELI_SNAPPY_SNAPPY_STREAMS_H_

#include <stddef.h>

#include "riegeli/base/assert.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "snappy-sinksource.h"

namespace riegeli::snappy_internal {

// Adapts a `Writer` to a `snappy::Sink`.
class WriterSnappySink : public snappy::Sink {
 public:
  explicit WriterSnappySink(Writer* dest)
      : dest_(RIEGELI_EVAL_ASSERT_NOTNULL(dest)) {}

  WriterSnappySink(const WriterSnappySink&) = delete;
  WriterSnappySink& operator=(const WriterSnappySink&) = delete;

  void Append(const char* src, size_t length) override;
  char* GetAppendBuffer(size_t length, char* scratch) override;
  void AppendAndTakeOwnership(char* src, size_t length,
                              void (*deleter)(void*, const char*, size_t),
                              void* deleter_arg) override;
  char* GetAppendBufferVariable(size_t min_length, size_t recommended_length,
                                char* scratch, size_t scratch_length,
                                size_t* result_length) override;

 private:
  Writer* dest_;
};

// Adapts a `Reader` to a `snappy::Source`.
class ReaderSnappySource : public snappy::Source {
 public:
  explicit ReaderSnappySource(Reader* src, Position size)
      : src_(RIEGELI_EVAL_ASSERT_NOTNULL(src)), size_(size) {}

  ReaderSnappySource(const ReaderSnappySource&) = delete;
  ReaderSnappySource& operator=(const ReaderSnappySource&) = delete;

  size_t Available() const override;
  const char* Peek(size_t* length) override;
  void Skip(size_t length) override;

 private:
  Reader* src_;
  Position size_;
};

}  // namespace riegeli::snappy_internal

#endif  // RIEGELI_SNAPPY_SNAPPY_STREAMS_H_


================================================
FILE: riegeli/snappy/snappy_writer.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/snappy/snappy_writer.h"

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/chain_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/snappy/snappy_streams.h"
#include "snappy.h"

namespace riegeli {

void SnappyWriterBase::Done() {
  if (ABSL_PREDICT_TRUE(ok())) SyncBuffer();
  Writer::Done();
  if (ABSL_PREDICT_TRUE(ok())) {
    Writer& dest = *DestWriter();
    if (absl::Status status = SnappyCompress(
            ChainReader<>(&uncompressed_), dest,
            SnappyCompressOptions().set_compression_level(compression_level_));
        ABSL_PREDICT_FALSE(!status.ok())) {
      FailWithoutAnnotation(std::move(status));
    }
  }
  uncompressed_ = Chain();
  associated_reader_.Reset();
}

inline size_t SnappyWriterBase::MaxBytesToCopy() const {
  const size_t max_bytes_to_copy = ~IntCast<size_t>(pos()) & (kBlockSize - 1);
  if (options_.size_hint() != std::nullopt &&
      IntCast<size_t>(pos()) < *options_.size_hint()) {
    return UnsignedMin(*options_.size_hint() - IntCast<size_t>(pos()) - 1,
                       max_bytes_to_copy);
  }
  return max_bytes_to_copy;
}

inline bool SnappyWriterBase::SyncBuffer() {
  set_start_pos(pos());
  uncompressed_.RemoveSuffix(available());
  set_buffer();
  if (ABSL_PREDICT_FALSE(IntCast<size_t>(start_pos()) >
                         std::numeric_limits<uint32_t>::max())) {
    return FailOverflow();
  }
  return true;
}

absl::Status SnappyWriterBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    Writer& dest = *DestWriter();
    status = dest.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `dest` with the compressed
  // position. Clarify that the current position is the uncompressed position
  // instead of delegating to `Writer::AnnotateStatusImpl()`.
  return AnnotateOverDest(std::move(status));
}

absl::Status SnappyWriterBase::AnnotateOverDest(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("at uncompressed byte ", pos()));
  }
  return status;
}

void SnappyWriterBase::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  if (write_size_hint == std::nullopt) {
    options_.set_size_hint(std::nullopt);
  } else {
    options_.set_size_hint(
        SaturatingIntCast<size_t>(SaturatingAdd(pos(), *write_size_hint)));
  }
}

bool SnappyWriterBase::PushSlow(size_t min_length, size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Writer::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(!SyncBuffer())) return false;
  if (ABSL_PREDICT_FALSE(min_length > std::numeric_limits<size_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  const absl::Span<char> buffer = uncompressed_.AppendFixedBuffer(
      UnsignedMax(
          ApplySizeHint(
              RoundUp<kBlockSize>(IntCast<size_t>(start_pos()) + min_length) -
                  IntCast<size_t>(start_pos()),
              options_.size_hint(), IntCast<size_t>(start_pos())),
          min_length),
      options_);
  set_buffer(buffer.data(), buffer.size());
  return true;
}

bool SnappyWriterBase::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  if (src.size() <= MaxBytesToCopy()) return Writer::WriteSlow(std::move(src));
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(!SyncBuffer())) return false;
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<uint32_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  uncompressed_.Append(std::move(src), options_);
  return true;
}

bool SnappyWriterBase::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  if (src.size() <= MaxBytesToCopy()) return Writer::WriteSlow(src);
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(!SyncBuffer())) return false;
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<uint32_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  uncompressed_.Append(src, options_);
  return true;
}

bool SnappyWriterBase::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  if (src.size() <= MaxBytesToCopy()) {
    // Not `std::move(src)`: forward to `Writer::WriteSlow(const Chain&)`,
    // because `Writer::WriteSlow(Chain&&)` would forward to
    // `SnappyWriterBase::WriteSlow(const Chain&)`.
    return Writer::WriteSlow(src);
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(!SyncBuffer())) return false;
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<uint32_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  uncompressed_.Append(std::move(src), options_);
  return true;
}

bool SnappyWriterBase::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  if (src.size() <= MaxBytesToCopy()) return Writer::WriteSlow(src);
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(!SyncBuffer())) return false;
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<uint32_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  uncompressed_.Append(src, options_);
  return true;
}

bool SnappyWriterBase::WriteSlow(absl::Cord&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord&&): "
         "enough space available, use Write(Cord&&) instead";
  if (src.size() <= MaxBytesToCopy()) {
    // Not `std::move(src)`: forward to `Writer::WriteSlow(const absl::Cord&)`,
    // because `Writer::WriteSlow(absl::Cord&&)` would forward to
    // `SnappyWriterBase::WriteSlow(const absl::Cord&)`.
    return Writer::WriteSlow(src);
  }
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(!SyncBuffer())) return false;
  if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<uint32_t>::max() -
                                          IntCast<size_t>(start_pos()))) {
    return FailOverflow();
  }
  move_start_pos(src.size());
  uncompressed_.Append(std::move(src), options_);
  return true;
}

bool SnappyWriterBase::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(IntCast<size_t>(pos()) >
                             std::numeric_limits<uint32_t>::max() ||
                         src.size() > std::numeric_limits<uint32_t>::max() -
                                          IntCast<size_t>(pos()))) {
    return FailOverflow();
  }
  const size_t first_length = UnsignedMin(
      RoundUp<kBlockSize>(IntCast<size_t>(pos())) - IntCast<size_t>(pos()),
      IntCast<size_t>(src.size()));
  if (ABSL_PREDICT_FALSE(!Push(first_length))) return false;
  riegeli::null_safe_memset(cursor(), src.fill(), first_length);
  move_cursor(first_length);
  src.Extract(first_length);
  Write(src.Extract(RoundDown<kBlockSize>(IntCast<size_t>(src.size()))));
  const size_t last_length = IntCast<size_t>(src.size());
  if (ABSL_PREDICT_FALSE(!Push(last_length))) return false;
  riegeli::null_safe_memset(cursor(), src.fill(), last_length);
  move_cursor(last_length);
  return true;
}

Reader* SnappyWriterBase::ReadModeImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  if (ABSL_PREDICT_FALSE(!SyncBuffer())) return nullptr;
  ChainReader<>* const reader = associated_reader_.ResetReader(&uncompressed_);
  reader->Seek(initial_pos);
  return reader;
}

namespace snappy_internal {

absl::Status SnappyCompressImpl(Reader& src, Writer& dest,
                                SnappyCompressOptions options) {
  const std::optional<Position> size = src.Size();
  if (ABSL_PREDICT_FALSE(size == std::nullopt)) return src.status();
  if (ABSL_PREDICT_FALSE(*size > std::numeric_limits<uint32_t>::max())) {
    return absl::ResourceExhaustedError(absl::StrCat(
        "Uncompressed data too large for snappy compression: ", *size, " > ",
        std::numeric_limits<uint32_t>::max()));
  }
  ReaderSnappySource source(&src, *size);
  WriterSnappySink sink(&dest);
  snappy::Compress(&source, &sink, {/*level=*/options.compression_level()});
  if (ABSL_PREDICT_FALSE(!dest.ok())) return dest.status();
  if (ABSL_PREDICT_FALSE(!src.ok())) return src.status();
  return absl::OkStatus();
}

}  // namespace snappy_internal

size_t SnappyMaxCompressedSize(size_t uncompressed_size) {
  return snappy::MaxCompressedLength(uncompressed_size);
}

}  // namespace riegeli


================================================
FILE: riegeli/snappy/snappy_writer.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_SNAPPY_SNAPPY_WRITER_H_
#define RIEGELI_SNAPPY_SNAPPY_WRITER_H_

#include <stddef.h>

#include <optional>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/writer.h"
#include "snappy.h"

namespace riegeli {

template <typename Src>
class ChainReader;
class Reader;

// Template parameter independent part of `SnappyWriter`.
class SnappyWriterBase : public Writer {
 public:
  class Options {
   public:
    Options() noexcept {}

    // Tunes the tradeoff between compression density and compression speed
    // (higher = better density but slower).
    //
    // `compression_level` must be between `kMinCompressionLevel` (1) and
    // `kMaxCompressionLevel` (2).
    static constexpr int kMinCompressionLevel =
        snappy::CompressionOptions::MinCompressionLevel();
    static constexpr int kMaxCompressionLevel =
        snappy::CompressionOptions::MaxCompressionLevel();
    static constexpr int kDefaultCompressionLevel =
        snappy::CompressionOptions::DefaultCompressionLevel();
    Options& set_compression_level(int compression_level) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GE(compression_level, kMinCompressionLevel)
          << "Failed precondition of "
             "SnappyWriterBase::Options::set_compression_level(): "
             "compression level out of range";
      RIEGELI_ASSERT_LE(compression_level, kMaxCompressionLevel)
          << "Failed precondition of "
             "SnappyWriterBase::Options::set_compression_level(): "
             "compression level out of range";
      compression_level_ = compression_level;
      return *this;
    }
    Options&& set_compression_level(int compression_level) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_compression_level(compression_level));
    }
    int compression_level() const { return compression_level_; }

   private:
    int compression_level_ = kDefaultCompressionLevel;
  };

  // Returns the compressed `Writer`. Unchanged by `Close()`.
  virtual Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool SupportsReadMode() override { return true; }

 protected:
  explicit SnappyWriterBase(Closed) noexcept : Writer(kClosed) {}

  explicit SnappyWriterBase();

  SnappyWriterBase(SnappyWriterBase&& that) noexcept;
  SnappyWriterBase& operator=(SnappyWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset();
  void Initialize(Writer* dest, int compression_level);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverDest(absl::Status status);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using Writer::WriteSlow;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(absl::Cord&& src) override;
  bool WriteSlow(ByteFill src) override;
  Reader* ReadModeImpl(Position initial_pos) override;

 private:
  // `snappy::kBlockSize`
  static constexpr size_t kBlockSize = size_t{64} << 10;

  // When deciding whether to copy an array of bytes or share memory, prefer
  // copying up to this length.
  size_t MaxBytesToCopy() const;

  // Discards uninitialized space from the end of `uncompressed_`, so that it
  // contains only actual data written.
  bool SyncBuffer();

  void MoveUncompressed(SnappyWriterBase& that);

  int compression_level_ = Options::kDefaultCompressionLevel;
  Chain::Options options_;

  // `Writer` methods are similar to `ChainWriter` methods writing to
  // `uncompressed_`.
  //
  // `snappy::Compress()` reads data in 64KB blocks, and copies a block to a
  // scratch buffer if it is not contiguous. Hence `Writer` methods try to
  // ensure that each 64KB block of `uncompressed_` is contiguous (unless that
  // would require earlier memory copies).
  Chain uncompressed_;

  AssociatedReader<ChainReader<const Chain*>> associated_reader_;

  // Invariant: `limit_pos() <= std::numeric_limits<size_t>::max()`
};

// A `Writer` which compresses data with Snappy before passing it to another
// `Writer`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the compressed `Writer`. `Dest` must support
// `Dependency<Writer*, Dest>`, e.g. `Writer*` (not owned, default),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The compressed `Writer` must not be accessed until the `SnappyWriter` is
// closed or no longer used.
//
// `SnappyWriter` does not compress incrementally but buffers uncompressed data
// and compresses them all in `Close()`.
//
// `Flush()` does nothing. It does not make data written so far visible.
template <typename Dest = Writer*>
class SnappyWriter : public SnappyWriterBase {
 public:
  // Creates a closed `SnappyWriter`.
  explicit SnappyWriter(Closed) noexcept : SnappyWriterBase(kClosed) {}

  // Will write to the compressed `Writer` provided by `dest`.
  explicit SnappyWriter(Initializer<Dest> dest, Options options = Options());

  SnappyWriter(SnappyWriter&& that) = default;
  SnappyWriter& operator=(SnappyWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `SnappyWriter`. This avoids
  // constructing a temporary `SnappyWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the compressed `Writer`.
  // Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;

 private:
  // The object providing and possibly owning the compressed `Writer`.
  Dependency<Writer*, Dest> dest_;
};

explicit SnappyWriter(Closed) -> SnappyWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit SnappyWriter(Dest&& dest, SnappyWriterBase::Options options =
                                       SnappyWriterBase::Options())
    -> SnappyWriter<TargetT<Dest>>;

// An alternative interface to Snappy which avoids buffering uncompressed data.
// Calling `SnappyCompress()` is equivalent to copying all data from `src` to a
// `SnappyWriter<Dest&&>`.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the uncompressed `Reader`. `Src` must support
// `DependencyRef<Reader*, Src>`, e.g. `Reader&` (not owned),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `AnyRef<Reader*>` (maybe owned).
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the compressed `Writer`. `Dest` must support
// `DependencyRef<Writer*, Dest>`, e.g. `Writer&` (not owned),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `AnyRef<Writer*>` (maybe owned).
//
// The uncompressed `Reader` must support `Size()`. To supply or override this
// size, the `Reader` can be wrapped in a `LimitingReader` with
// `LimitingReaderBase::Options().set_exact_length(size)`.

using SnappyCompressOptions = SnappyWriterBase::Options;

template <typename Src, typename Dest,
          std::enable_if_t<
              std::conjunction_v<TargetRefSupportsDependency<Reader*, Src>,
                                 TargetRefSupportsDependency<Writer*, Dest>>,
              int> = 0>
absl::Status SnappyCompress(
    Src&& src, Dest&& dest,
    SnappyCompressOptions options = SnappyCompressOptions());

// Returns the maximum compressed size produced by the Snappy compressor for
// data of the given uncompressed size.
size_t SnappyMaxCompressedSize(size_t uncompressed_size);

// Implementation details follow.

inline SnappyWriterBase::SnappyWriterBase()
    : options_(Chain::Options().set_block_size(kBlockSize)) {}

inline SnappyWriterBase::SnappyWriterBase(SnappyWriterBase&& that) noexcept
    : Writer(static_cast<Writer&&>(that)),
      compression_level_(that.compression_level_),
      options_(that.options_),
      associated_reader_(std::move(that.associated_reader_)) {
  MoveUncompressed(that);
}

inline SnappyWriterBase& SnappyWriterBase::operator=(
    SnappyWriterBase&& that) noexcept {
  Writer::operator=(static_cast<Writer&&>(that));
  compression_level_ = that.compression_level_;
  options_ = that.options_;
  associated_reader_ = std::move(that.associated_reader_);
  MoveUncompressed(that);
  return *this;
}

inline void SnappyWriterBase::Reset(Closed) {
  Writer::Reset(kClosed);
  compression_level_ = Options::kDefaultCompressionLevel;
  options_ = Chain::Options();
  uncompressed_ = Chain();
  associated_reader_.Reset();
}

inline void SnappyWriterBase::Reset() {
  Writer::Reset();
  compression_level_ = Options::kDefaultCompressionLevel;
  options_ = Chain::Options().set_block_size(kBlockSize);
  uncompressed_.Clear();
  associated_reader_.Reset();
}

inline void SnappyWriterBase::Initialize(Writer* dest, int compression_level) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of SnappyWriter: null Writer pointer";
  compression_level_ = compression_level;
  if (ABSL_PREDICT_FALSE(!dest->ok())) {
    FailWithoutAnnotation(AnnotateOverDest(dest->status()));
  }
}

inline void SnappyWriterBase::MoveUncompressed(SnappyWriterBase& that) {
  const bool uses_buffer = start() != nullptr;
  if (uses_buffer) {
    RIEGELI_ASSERT(that.uncompressed_.blocks().back().data() +
                       that.uncompressed_.blocks().back().size() ==
                   limit())
        << "Failed invariant of SnappyWriter: "
           "uncompressed data inconsistent with buffer pointers";
    RIEGELI_ASSERT_EQ(that.uncompressed_.size(), limit_pos())
        << "Failed invariant of SnappyWriter: "
           "uncompressed data inconsistent with buffer pointers";
  }
  const size_t saved_start_to_cursor = start_to_cursor();
  uncompressed_ = std::move(that.uncompressed_);
  if (uses_buffer) {
    const size_t buffer_size =
        uncompressed_.size() - IntCast<size_t>(start_pos());
    const absl::string_view last_block = uncompressed_.blocks().back();
    set_buffer(
        const_cast<char*>(last_block.data() + last_block.size()) - buffer_size,
        buffer_size, saved_start_to_cursor);
  }
}

template <typename Dest>
inline SnappyWriter<Dest>::SnappyWriter(Initializer<Dest> dest, Options options)
    : dest_(std::move(dest)) {
  Initialize(dest_.get(), options.compression_level());
}

template <typename Dest>
inline void SnappyWriter<Dest>::Reset(Closed) {
  SnappyWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void SnappyWriter<Dest>::Reset(Initializer<Dest> dest, Options options) {
  SnappyWriterBase::Reset();
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options.compression_level());
}

template <typename Dest>
void SnappyWriter<Dest>::Done() {
  SnappyWriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
}

namespace snappy_internal {

absl::Status SnappyCompressImpl(Reader& src, Writer& dest,
                                SnappyCompressOptions options);

}  // namespace snappy_internal

template <typename Src, typename Dest,
          std::enable_if_t<
              std::conjunction_v<TargetRefSupportsDependency<Reader*, Src>,
                                 TargetRefSupportsDependency<Writer*, Dest>>,
              int>>
inline absl::Status SnappyCompress(Src&& src, Dest&& dest,
                                   SnappyCompressOptions options) {
  DependencyRef<Reader*, Src> src_dep(std::forward<Src>(src));
  DependencyRef<Writer*, Dest> dest_dep(std::forward<Dest>(dest));
  if (src_dep.IsOwning()) src_dep->SetReadAllHint(true);
  absl::Status status =
      snappy_internal::SnappyCompressImpl(*src_dep, *dest_dep, options);
  if (dest_dep.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_dep->Close())) {
      status.Update(dest_dep->status());
    }
  }
  if (src_dep.IsOwning()) {
    if (ABSL_PREDICT_TRUE(status.ok())) src_dep->VerifyEnd();
    if (ABSL_PREDICT_FALSE(!src_dep->Close())) status.Update(src_dep->status());
  }
  return status;
}

}  // namespace riegeli

#endif  // RIEGELI_SNAPPY_SNAPPY_WRITER_H_


================================================
FILE: riegeli/tensorflow/BUILD
================================================
exports_files([
    "kernels/riegeli_dataset_ops.cc",
    "ops/riegeli_dataset_ops.cc",
])


================================================
FILE: riegeli/tensorflow/io/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "file_reader",
    srcs = ["file_reader.cc"],
    hdrs = ["file_reader.h"],
    deps = [
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/types:span",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:null_safe_memcpy",
        "//riegeli/base:object",
        "//riegeli/base:reset",
        "//riegeli/base:sized_shared_buffer",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:backward_writer",
        "//riegeli/bytes:buffer_options",
        "//riegeli/bytes:path_ref",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:writer",
        # TODO: Riegeli/TensorFlow build is currently broken
        # in open source because TensorFlow does not support bzlmod yet.
    ],
)

cc_library(
    name = "file_writer",
    srcs = ["file_writer.cc"],
    hdrs = ["file_writer.h"],
    deps = [
        ":file_reader",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/strings:cord",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:buffering",
        "//riegeli/base:chain",
        "//riegeli/base:dependency",
        "//riegeli/base:external_ref",
        "//riegeli/base:byte_fill",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:reset",
        "//riegeli/base:shared_buffer",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:buffer_options",
        "//riegeli/bytes:path_ref",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:writer",
        # TODO: Riegeli/TensorFlow build is currently broken
        # in open source because TensorFlow does not support bzlmod yet.
    ],
)

cc_library(
    name = "tstring_writer",
    hdrs = ["tstring_writer.h"],
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/bytes:resizable_writer",
        # TODO: Riegeli/TensorFlow build is currently broken
        # in open source because TensorFlow does not support bzlmod yet.
    ],
)


================================================
FILE: riegeli/tensorflow/io/file_reader.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/tensorflow/io/file_reader.h"

#include <stddef.h>
#include <stdint.h>

#include <cstring>
#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/null_safe_memcpy.h"
#include "riegeli/base/object.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/sized_shared_buffer.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/path_ref.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "tensorflow/compiler/xla/tsl/platform/env.h"
#include "tensorflow/compiler/xla/tsl/platform/file_system.h"
#include "tensorflow/core/public/version.h"

namespace riegeli::tensorflow {

bool FileReaderBase::InitializeFilename(tsl::RandomAccessFile* src) {
  absl::string_view filename;
  if (const absl::Status status = src->Name(&filename);
      ABSL_PREDICT_FALSE(!status.ok())) {
    filename_ = "<unknown>";
    if (!absl::IsUnimplemented(status)) {
      return FailOperation(status, "RandomAccessFile::Name()");
    }
    return true;
  }
  return InitializeFilename(filename);
}

bool FileReaderBase::InitializeFilename(PathInitializer filename) {
  riegeli::Reset(filename_, std::move(filename));
  if (const absl::Status status =
          env_->GetFileSystemForFile(filename_, &file_system_);
      ABSL_PREDICT_FALSE(!status.ok())) {
    return FailOperation(status, "Env::GetFileSystemForFile()");
  }
  return true;
}

std::unique_ptr<tsl::RandomAccessFile> FileReaderBase::OpenFile() {
  std::unique_ptr<tsl::RandomAccessFile> src;
  if (const absl::Status status =
          file_system_->NewRandomAccessFile(filename_, &src);
      ABSL_PREDICT_FALSE(!status.ok())) {
    Reader::Reset(kClosed);
    FailOperation(status, "FileSystem::NewRandomAccessFile()");
    RIEGELI_ASSERT(src == nullptr)
        << "FileSystem::NewRandomAccessFile() should store "
           "null RandomAccessFile on failure";
  }
  return src;
}

void FileReaderBase::InitializePos(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(initial_pos > std::numeric_limits<uint64_t>::max())) {
    FailOverflow();
    return;
  }
  set_limit_pos(initial_pos);
  buffer_sizer_.BeginRun(limit_pos());
}

void FileReaderBase::Done() {
  Reader::Done();
  buffer_ = SizedSharedBuffer();
}

inline bool FileReaderBase::FailOperation(const absl::Status& status,
                                          absl::string_view operation) {
  RIEGELI_ASSERT(!status.ok())
      << "Failed precondition of FileReaderBase::FailOperation(): "
         "status not failed";
  return Fail(
      Annotate(absl::Status(static_cast<absl::StatusCode>(status.code()),
#if TF_GRAPH_DEF_VERSION < 1467
                            status.error_message()
#else
                            status.message()
#endif
                                ),
               absl::StrCat(operation, " failed")));
}

inline absl::Status FileReaderBase::NoRandomAccessStatus() {
  return absl::UnimplementedError("A filename required for random access");
}

absl::Status FileReaderBase::AnnotateStatusImpl(absl::Status status) {
  return Reader::AnnotateStatusImpl(
      Annotate(status, absl::StrCat("reading ", filename_)));
}

inline void FileReaderBase::SyncBuffer() {
  buffer_.Clear();
  set_buffer();
}

void FileReaderBase::SetReadAllHintImpl(bool read_all_hint) {
  buffer_sizer_.set_read_all_hint(read_all_hint);
}

bool FileReaderBase::PullSlow(size_t min_length, size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Reader::PullSlow(): "
         "enough data available, use Pull() instead";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  tsl::RandomAccessFile* const src = SrcFile();
  const size_t available_length = available();
  const size_t buffer_length = buffer_sizer_.BufferLength(
      limit_pos(), min_length - available_length,
      SaturatingSub(recommended_length, available_length));
  if (ABSL_PREDICT_FALSE(buffer_length == 0)) return false;
  size_t cursor_index;
  absl::Span<char> flat_buffer;
  if (buffer_.empty()) {
    // Copy available data to `buffer_` so that newly read data will be adjacent
    // to available data.
    cursor_index = 0;
    flat_buffer = buffer_.AppendFixedBuffer(available_length + buffer_length);
    riegeli::null_safe_memcpy(flat_buffer.data(), cursor(), available_length);
    flat_buffer.remove_prefix(available_length);
  } else {
    cursor_index = start_to_cursor();
    flat_buffer = buffer_.AppendBufferIfExisting(buffer_length);
    if (flat_buffer.empty()) {
      // Not enough space in `buffer_`. Resize `buffer_`, keeping available
      // data.
      buffer_.RemovePrefix(cursor_index);
      buffer_.Shrink(available_length + buffer_length);
      cursor_index = 0;
      flat_buffer = buffer_.AppendFixedBuffer(buffer_length);
    }
  }
  // Read more data, preferably into `buffer_`.
  ReadToBuffer(cursor_index, src, flat_buffer);
  return available() >= min_length;
}

inline bool FileReaderBase::ReadToDest(size_t length,
                                       tsl::RandomAccessFile* src, char* dest) {
  if (ABSL_PREDICT_FALSE(limit_pos() >= std::numeric_limits<uint64_t>::max())) {
    return FailOverflow();
  }
  const size_t length_to_read =
      UnsignedMin(length, std::numeric_limits<uint64_t>::max() - limit_pos());
  absl::string_view result;
  const absl::Status status = src->Read(IntCast<uint64_t>(limit_pos()), result,
                                        absl::MakeSpan(dest, length_to_read));
  RIEGELI_ASSERT_LE(result.size(), length_to_read)
      << "RandomAccessFile::Read() read more than requested";
  if (result.data() != dest) std::memcpy(dest, result.data(), result.size());
  move_limit_pos(result.size());
  if (ABSL_PREDICT_FALSE(!status.ok())) {
    if (ABSL_PREDICT_FALSE(!absl::IsOutOfRange(status))) {
      return FailOperation(status, "RandomAccessFile::Read()");
    }
    if (!growing_source_) set_exact_size(limit_pos());
    return false;
  }
  RIEGELI_ASSERT_EQ(result.size(), length_to_read)
      << "RandomAccessFile::Read() succeeded but read less than requested";
  if (ABSL_PREDICT_FALSE(result.size() < length)) {
    // `result.size() == length_to_read < length`, which implies that
    // `std::numeric_limits<uint64_t>::max()` was reached.
    RIEGELI_ASSERT_EQ(limit_pos(), std::numeric_limits<uint64_t>::max())
        << "Maximum position must have been reached";
    return FailOverflow();
  }
  return true;
}

inline bool FileReaderBase::ReadToBuffer(size_t cursor_index,
                                         tsl::RandomAccessFile* src,
                                         absl::Span<char> flat_buffer) {
  RIEGELI_ASSERT(flat_buffer.data() + flat_buffer.size() ==
                 buffer_.data() + buffer_.size())
      << "Failed precondition of FileReaderBase::ReadToBuffer(): "
         "flat_buffer not a suffix of buffer_";
  if (ABSL_PREDICT_FALSE(limit_pos() >= std::numeric_limits<uint64_t>::max())) {
    buffer_.RemoveSuffix(flat_buffer.size());
    set_buffer(buffer_.data(), buffer_.size(), cursor_index);
    return FailOverflow();
  }
  const size_t length_to_read = UnsignedMin(
      flat_buffer.size(), std::numeric_limits<uint64_t>::max() - limit_pos());
  absl::string_view result;
  const absl::Status status =
      src->Read(IntCast<uint64_t>(limit_pos()), result,
                absl::MakeSpan(flat_buffer.data(), length_to_read));
  RIEGELI_ASSERT_LE(result.size(), length_to_read)
      << "RandomAccessFile::Read() read more than requested";
  if (result.data() == flat_buffer.data()) {
    buffer_.RemoveSuffix(flat_buffer.size() - result.size());
    set_buffer(buffer_.data(), buffer_.size(), cursor_index);
  } else if (buffer_.size() > cursor_index + flat_buffer.size()) {
    // Copy newly read data to `buffer_` so that they are adjacent to previously
    // available data.
    std::memcpy(flat_buffer.data(), result.data(), result.size());
    buffer_.RemoveSuffix(flat_buffer.size() - result.size());
    set_buffer(buffer_.data(), buffer_.size(), cursor_index);
  } else {
    buffer_.Clear();
    set_buffer(result.data(), result.size());
  }
  move_limit_pos(result.size());
  if (ABSL_PREDICT_FALSE(!status.ok())) {
    if (ABSL_PREDICT_FALSE(!absl::IsOutOfRange(status))) {
      return FailOperation(status, "RandomAccessFile::Read()");
    }
    if (!growing_source_) set_exact_size(limit_pos());
    return false;
  }
  RIEGELI_ASSERT_EQ(result.size(), length_to_read)
      << "RandomAccessFile::Read() succeeded but read less than requested";
  if (ABSL_PREDICT_FALSE(result.size() < flat_buffer.size())) {
    // `result.size() == length_to_read < flat_buffer.size()`, which implies
    // that `std::numeric_limits<uint64_t>::max()` was reached.
    RIEGELI_ASSERT_EQ(limit_pos(), std::numeric_limits<uint64_t>::max())
        << "Maximum position must have been reached";
    return FailOverflow();
  }
  return true;
}

bool FileReaderBase::ReadSlow(size_t length, char* dest) {
  RIEGELI_ASSERT_LT(available(), length)
      << "Failed precondition of Reader::ReadSlow(char*): "
         "enough data available, use Read(char*) instead";
  if (length >= buffer_sizer_.BufferLength(pos())) {
    // Read directly to `dest`.
    const size_t available_length = available();
    riegeli::null_safe_memcpy(dest, cursor(), available_length);
    dest += available_length;
    length -= available_length;
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    tsl::RandomAccessFile* const src = SrcFile();
    SyncBuffer();
    size_t length_to_read = length;
    if (exact_size() != std::nullopt) {
      if (ABSL_PREDICT_FALSE(limit_pos() >= *exact_size())) return false;
      length_to_read = UnsignedMin(length_to_read, *exact_size() - limit_pos());
    }
    if (ABSL_PREDICT_FALSE(!ReadToDest(length_to_read, src, dest))) {
      return false;
    }
    return length_to_read >= length;
  }
  return Reader::ReadSlow(length, dest);
}

bool FileReaderBase::ReadSlow(size_t length, Chain& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "enough data available, use Read(Chain&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Chain&): "
         "Chain size overflow";
  tsl::RandomAccessFile* const src = SrcFile();
  bool enough_read = true;
  while (length > available()) {
    const size_t available_length = available();
    if (ABSL_PREDICT_FALSE(!ok())) {
      // Read as much as is available.
      enough_read = false;
      length = available_length;
      break;
    }
    const size_t buffer_length =
        buffer_sizer_.BufferLength(limit_pos(), 1, length - available_length);
    size_t cursor_index;
    absl::Span<char> flat_buffer;
    if (buffer_.empty()) {
      // Do not extend `buffer_` if available data are outside of `buffer_`,
      // because available data would be lost.
      dest.Append(absl::string_view(cursor(), available_length));
      length -= available_length;
      if (ABSL_PREDICT_FALSE(buffer_length == 0)) {
        set_buffer();
        return false;
      }
      cursor_index = 0;
      flat_buffer = buffer_.AppendFixedBuffer(buffer_length);
    } else {
      cursor_index = start_to_cursor();
      flat_buffer = buffer_.AppendBufferIfExisting(buffer_length);
      if (flat_buffer.empty()) {
        // Not enough space in `buffer_`. Append available data to `dest` and
        // make a new buffer.
        dest.Append(ExternalRef(std::move(buffer_),
                                absl::string_view(cursor(), available_length)));
        length -= available_length;
        buffer_.ClearAndShrink(buffer_length);
        if (ABSL_PREDICT_FALSE(buffer_length == 0)) {
          set_buffer();
          return false;
        }
        cursor_index = 0;
        flat_buffer = buffer_.AppendFixedBuffer(buffer_length);
      }
    }
    // Read more data, preferably into `buffer_`.
    if (ABSL_PREDICT_FALSE(!ReadToBuffer(cursor_index, src, flat_buffer))) {
      // Read as much as is available.
      enough_read = available() >= length;
      if (ABSL_PREDICT_FALSE(!enough_read)) length = available();
      break;
    }
  }
  if (buffer_.empty()) {
    dest.Append(absl::string_view(cursor(), length));
  } else {
    dest.Append(ExternalRef(buffer_, absl::string_view(cursor(), length)));
  }
  move_cursor(length);
  return enough_read;
}

bool FileReaderBase::ReadSlow(size_t length, absl::Cord& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "enough data available, use Read(Cord&) instead";
  RIEGELI_ASSERT_LE(length, std::numeric_limits<size_t>::max() - dest.size())
      << "Failed precondition of Reader::ReadSlow(Cord&): "
         "Cord size overflow";
  tsl::RandomAccessFile* const src = SrcFile();
  bool enough_read = true;
  while (length > available()) {
    const size_t available_length = available();
    if (ABSL_PREDICT_FALSE(!ok())) {
      // Read as much as is available.
      enough_read = false;
      length = available_length;
      break;
    }
    const size_t buffer_length =
        buffer_sizer_.BufferLength(limit_pos(), 1, length - available_length);
    size_t cursor_index;
    absl::Span<char> flat_buffer;
    if (buffer_.empty()) {
      // Do not extend `buffer_` if available data are outside of `buffer_`,
      // because available data would be lost.
      dest.Append(absl::string_view(cursor(), available_length));
      length -= available_length;
      if (ABSL_PREDICT_FALSE(buffer_length == 0)) {
        set_buffer();
        return false;
      }
      cursor_index = 0;
      flat_buffer = buffer_.AppendFixedBuffer(buffer_length);
    } else {
      cursor_index = start_to_cursor();
      flat_buffer = buffer_.AppendBufferIfExisting(buffer_length);
      if (flat_buffer.empty()) {
        // Not enough space in `buffer_`. Append available data to `dest` and
        // make a new buffer.
        ExternalRef(std::move(buffer_),
                    absl::string_view(cursor(), available_length))
            .AppendTo(dest);
        length -= available_length;
        buffer_.ClearAndShrink(buffer_length);
        if (ABSL_PREDICT_FALSE(buffer_length == 0)) {
          set_buffer();
          return false;
        }
        cursor_index = 0;
        flat_buffer = buffer_.AppendFixedBuffer(buffer_length);
      }
    }
    // Read more data, preferably into `buffer_`.
    if (ABSL_PREDICT_FALSE(!ReadToBuffer(cursor_index, src, flat_buffer))) {
      // Read as much as is available.
      enough_read = available() >= length;
      if (ABSL_PREDICT_FALSE(!enough_read)) length = available();
      break;
    }
  }
  if (buffer_.empty()) {
    dest.Append(absl::string_view(cursor(), length));
  } else {
    ExternalRef(buffer_, absl::string_view(cursor(), length)).AppendTo(dest);
  }
  move_cursor(length);
  return enough_read;
}

bool FileReaderBase::CopySlow(Position length, Writer& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::CopySlow(Writer&): "
         "enough data available, use Copy(Writer&) instead";
  tsl::RandomAccessFile* const src = SrcFile();
  bool enough_read = true;
  while (length > available()) {
    const size_t available_length = available();
    if (ABSL_PREDICT_FALSE(!ok())) {
      // Copy as much as is available.
      length = available_length;
      enough_read = false;
      break;
    }
    const bool read_directly = length >= buffer_sizer_.BufferLength(pos());
    if (read_directly) {
      if (buffer_.empty() || available_length <= kMaxBytesToCopy) {
        if (ABSL_PREDICT_FALSE(
                !dest.Write(absl::string_view(cursor(), available_length)))) {
          move_cursor(available_length);
          return false;
        }
        length -= available_length;
        SyncBuffer();
        return CopyUsingPush(length, src, dest);
      }
      // It is better to write available data from `buffer_` as a `Chain` before
      // reading directly to `dest`. Before that, `buffer_` might need to be
      // filled more to avoid attaching a wasteful `Chain`.
    }
    const size_t buffer_length =
        buffer_sizer_.BufferLength(limit_pos(), 1, length - available_length);
    size_t cursor_index;
    absl::Span<char> flat_buffer;
    if (buffer_.empty()) {
      // Do not extend `buffer_` if available data are outside of `buffer_`,
      // because available data would be lost.
      if (ABSL_PREDICT_FALSE(
              !dest.Write(absl::string_view(cursor(), available_length)))) {
        move_cursor(available_length);
        return false;
      }
      length -= available_length;
      if (ABSL_PREDICT_FALSE(buffer_length == 0)) {
        set_buffer();
        return false;
      }
      cursor_index = 0;
      flat_buffer = buffer_.AppendFixedBuffer(buffer_length);
    } else {
      cursor_index = start_to_cursor();
      flat_buffer = buffer_.AppendBufferIfExisting(buffer_length);
      if (flat_buffer.empty()) {
        // Not enough space in `buffer_`. Append available data to `dest` and
        // make a new buffer.
        if (available_length > 0) {
          const bool write_ok = dest.Write(
              ExternalRef(std::move(buffer_),
                          absl::string_view(cursor(), available_length)));
          if (ABSL_PREDICT_FALSE(!write_ok)) {
            buffer_.ClearAndShrink(buffer_length);
            set_buffer();
            return false;
          }
          length -= available_length;
        }
        buffer_.ClearAndShrink(buffer_length);
        if (ABSL_PREDICT_FALSE(buffer_length == 0)) {
          set_buffer();
          return false;
        }
        if (read_directly) {
          set_buffer();
          return CopyUsingPush(length, src, dest);
        }
        cursor_index = 0;
        flat_buffer = buffer_.AppendFixedBuffer(buffer_length);
      }
    }
    // Read more data, preferably into `buffer_`.
    if (ABSL_PREDICT_FALSE(!ReadToBuffer(cursor_index, src, flat_buffer))) {
      // Copy as much as is available.
      enough_read = available() >= length;
      if (ABSL_PREDICT_FALSE(!enough_read)) length = available();
      break;
    }
  }
  const bool write_ok =
      buffer_.empty()
          ? dest.Write(absl::string_view(cursor(), IntCast<size_t>(length)))
          : dest.Write(ExternalRef(
                buffer_, absl::string_view(cursor(), IntCast<size_t>(length))));
  move_cursor(IntCast<size_t>(length));
  return write_ok && enough_read;
}

inline bool FileReaderBase::CopyUsingPush(Position length,
                                          tsl::RandomAccessFile* src,
                                          Writer& dest) {
  RIEGELI_ASSERT_GT(length, 0u)
      << "Failed precondition of FileReaderBase::CopyUsingPush(): "
         "nothing to copy";
  do {
    size_t length_to_read = SaturatingIntCast<size_t>(length);
    if (exact_size() != std::nullopt) {
      if (ABSL_PREDICT_FALSE(limit_pos() >= *exact_size())) return false;
      length_to_read = UnsignedMin(length_to_read, *exact_size() - limit_pos());
    }
    if (ABSL_PREDICT_FALSE(!dest.Push(1, length_to_read))) return false;
    const size_t length_to_copy = UnsignedMin(length_to_read, dest.available());
    const Position pos_before = limit_pos();
    const bool read_ok = ReadToDest(length_to_copy, src, dest.cursor());
    const Position length_read = limit_pos() - pos_before;
    dest.move_cursor(IntCast<size_t>(length_read));
    if (ABSL_PREDICT_FALSE(!read_ok)) return false;
    length -= length_read;
  } while (length > 0);
  return true;
}

bool FileReaderBase::CopySlow(size_t length, BackwardWriter& dest) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), length)
      << "Failed precondition of Reader::CopySlow(BackwardWriter&): "
         "enough data available, use Copy(BackwardWriter&) instead";
  if (length <= available() && buffer_.empty()) {
    // Avoid writing an `absl::string_view` if available data are in `buffer_`,
    // because in this case it is better to write a `Chain`.
    const absl::string_view data(cursor(), length);
    move_cursor(length);
    return dest.Write(data);
  }
  if (length <= kMaxBytesToCopy) {
    if (ABSL_PREDICT_FALSE(!dest.Push(length))) return false;
    dest.move_cursor(length);
    if (ABSL_PREDICT_FALSE(!ReadSlow(length, dest.cursor()))) {
      dest.set_cursor(dest.cursor() + length);
      return false;
    }
    return true;
  }
  Chain data;
  if (ABSL_PREDICT_FALSE(!ReadSlow(length, data))) return false;
  return dest.Write(std::move(data));
}

bool FileReaderBase::ReadSomeSlow(size_t max_length, char* dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "nothing to read, use ReadSome(char*) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::ReadSomeSlow(char*): "
         "some data available, use ReadSome(char*) instead";
  if (max_length >= buffer_sizer_.BufferLength(limit_pos())) {
    // Read directly to `dest`.
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    tsl::RandomAccessFile* const src = SrcFile();
    SyncBuffer();
    if (exact_size() != std::nullopt) {
      if (ABSL_PREDICT_FALSE(limit_pos() >= *exact_size())) return false;
      max_length = UnsignedMin(max_length, *exact_size() - limit_pos());
    }
    const Position pos_before = limit_pos();
    ReadToDest(max_length, src, dest);
    RIEGELI_ASSERT_GE(limit_pos(), pos_before)
        << "FileReaderBase::ReadToDest() decreased limit_pos()";
    return limit_pos() != pos_before;
  }
  return Reader::ReadSomeSlow(max_length, dest);
}

bool FileReaderBase::CopySomeSlow(size_t max_length, Writer& dest) {
  RIEGELI_ASSERT_GT(max_length, 0u)
      << "Failed precondition of Reader::CopySomeSlow(Writer&): "
         "nothing to read, use CopySome(Writer&) instead";
  RIEGELI_ASSERT_EQ(available(), 0u)
      << "Failed precondition of Reader::CopySomeSlow(Writer&): "
         "some data available, use CopySome(Writer&) instead";
  if (max_length >= buffer_sizer_.BufferLength(limit_pos())) {
    // Copy directly to `dest`.
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    tsl::RandomAccessFile* const src = SrcFile();
    SyncBuffer();
    if (exact_size() != std::nullopt) {
      if (ABSL_PREDICT_FALSE(limit_pos() >= *exact_size())) return false;
      max_length = UnsignedMin(max_length, *exact_size() - limit_pos());
    }
    if (ABSL_PREDICT_FALSE(!dest.Push(1, max_length))) return false;
    max_length = UnsignedMin(max_length, dest.available());
    const Position pos_before = limit_pos();
    ReadToDest(max_length, src, dest.cursor());
    RIEGELI_ASSERT_GE(limit_pos(), pos_before)
        << "BufferedReader::ReadInternal() decreased limit_pos()";
    const Position length_read = limit_pos() - pos_before;
    RIEGELI_ASSERT_LE(length_read, max_length)
        << "BufferedReader::ReadInternal() read more than requested";
    dest.move_cursor(IntCast<size_t>(length_read));
    return length_read > 0;
  }
  return Reader::CopySomeSlow(max_length, dest);
}

bool FileReaderBase::SyncImpl(SyncType sync_type) {
  const Position new_pos = pos();
  buffer_sizer_.EndRun(new_pos);
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer();
  set_limit_pos(new_pos);
  buffer_sizer_.BeginRun(limit_pos());
  return true;
}

bool FileReaderBase::SeekSlow(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of Reader::SeekSlow(): "
         "position in the buffer, use Seek() instead";
  if (ABSL_PREDICT_FALSE(!FileReaderBase::SupportsRandomAccess())) {
    if (ABSL_PREDICT_FALSE(new_pos < start_pos())) {
      if (ok()) Fail(NoRandomAccessStatus());
      return false;
    }
    return Reader::SeekSlow(new_pos);
  }
  buffer_sizer_.EndRun(pos());
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  SyncBuffer();
  if (new_pos > limit_pos()) {
    // Seeking forwards.
    uint64_t file_size;
    if (exact_size() != std::nullopt) {
      file_size = IntCast<uint64_t>(*exact_size());
    } else {
      if (const absl::Status status =
              file_system_->GetFileSize(filename_, &file_size);
          ABSL_PREDICT_FALSE(!status.ok())) {
        return FailOperation(status, "FileSystem::GetFileSize()");
      }
      if (!growing_source_) set_exact_size(Position{file_size});
    }
    if (ABSL_PREDICT_FALSE(new_pos > file_size)) {
      // File ends.
      set_limit_pos(Position{file_size});
      buffer_sizer_.BeginRun(limit_pos());
      return false;
    }
  }
  set_limit_pos(new_pos);
  buffer_sizer_.BeginRun(limit_pos());
  return true;
}

std::optional<Position> FileReaderBase::SizeImpl() {
  if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt;
  if (exact_size() != std::nullopt) return *exact_size();
  if (ABSL_PREDICT_FALSE(!FileReaderBase::SupportsRandomAccess())) {
    Fail(NoRandomAccessStatus());
    return std::nullopt;
  }
  uint64_t file_size;
  if (const absl::Status status =
          file_system_->GetFileSize(filename_, &file_size);
      ABSL_PREDICT_FALSE(!status.ok())) {
    FailOperation(status, "FileSystem::GetFileSize()");
    return std::nullopt;
  }
  if (!growing_source_) set_exact_size(Position{file_size});
  return Position{file_size};
}

std::unique_ptr<Reader> FileReaderBase::NewReaderImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!FileReaderBase::SupportsRandomAccess())) {
    if (ok()) Fail(NoRandomAccessStatus());
    return nullptr;
  }
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point.
  tsl::RandomAccessFile* const src = SrcFile();
  std::unique_ptr<FileReader<tsl::RandomAccessFile*>> reader =
      std::make_unique<FileReader<tsl::RandomAccessFile*>>(
          src, FileReaderBase::Options()
                   .set_env(env_)
                   .set_initial_pos(initial_pos)
                   .set_growing_source(growing_source_)
                   .set_buffer_options(buffer_sizer_.buffer_options()));
  reader->set_exact_size(exact_size());
  return reader;
}

std::unique_ptr<Reader> FileReaderBase::NewReaderCurrentPosImpl() {
  std::unique_ptr<Reader> reader = FileReaderBase::NewReaderImpl(pos());
  if (ABSL_PREDICT_TRUE(reader != nullptr)) {
    // Share `buffer_` with `*reader`.
    FileReaderBase* const file_reader =
        static_cast<FileReaderBase*>(reader.get());
    file_reader->buffer_ = buffer_;
    file_reader->set_buffer(start(), start_to_limit(), start_to_cursor());
    file_reader->set_limit_pos(limit_pos());
  }
  return reader;
}

}  // namespace riegeli::tensorflow


================================================
FILE: riegeli/tensorflow/io/file_reader.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_TENSORFLOW_IO_FILE_READER_H_
#define RIEGELI_TENSORFLOW_IO_FILE_READER_H_

#include <stddef.h>

#include <memory>
#include <optional>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/base/sized_shared_buffer.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/path_ref.h"
#include "riegeli/bytes/reader.h"
#include "tensorflow/compiler/xla/tsl/platform/env.h"
#include "tensorflow/compiler/xla/tsl/platform/file_system.h"

namespace riegeli {

class BackwardWriter;
class Writer;

namespace tensorflow {

// Template parameter independent part of `FileReader`.
class FileReaderBase : public Reader {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // `FileReader` has a larger `kDefaultMaxBufferSize` (1M) because remote
    // file access may have high latency of each operation.
    static constexpr size_t kDefaultMaxBufferSize = size_t{1} << 20;

    // Overrides the TensorFlow environment.
    //
    // `nullptr` is interpreted as `tsl::Env::Default()`.
    //
    // Default: `nullptr`.
    Options& set_env(tsl::Env* env) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      env_ = env;
      return *this;
    }
    Options&& set_env(tsl::Env* env) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_env(env));
    }
    tsl::Env* env() const { return env_; }

    // Reading will start from this position.
    //
    // Default: 0.
    Options& set_initial_pos(Position initial_pos) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      initial_pos_ = initial_pos;
      return *this;
    }
    Options&& set_initial_pos(Position initial_pos) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_initial_pos(initial_pos));
    }
    Position initial_pos() const { return initial_pos_; }

    // If `true`, supports reading up to the end of the file, then retrying when
    // the file has grown. This disables caching the file size.
    //
    // Default: `false`.
    Options& set_growing_source(bool growing_source) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      growing_source_ = growing_source;
      return *this;
    }
    Options&& set_growing_source(bool growing_source) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_growing_source(growing_source));
    }
    bool growing_source() const { return growing_source_; }

   private:
    tsl::Env* env_ = nullptr;
    Position initial_pos_ = 0;
    bool growing_source_ = false;
  };

  // Returns the `tsl::RandomAccessFile` being read from. If the
  // `tsl::RandomAccessFile` is owned then changed to `nullptr` by
  // `Close()`, otherwise unchanged.
  virtual tsl::RandomAccessFile* SrcFile() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns the name of the `tsl::RandomAccessFile` being read from.
  // Unchanged by `Close()`.
  absl::string_view filename() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return filename_;
  }

  bool ToleratesReadingAhead() override {
    return buffer_sizer_.read_all_hint() ||
           FileReaderBase::SupportsRandomAccess();
  }
  bool SupportsRandomAccess() override { return file_system_ != nullptr; }
  bool SupportsNewReader() override {
    return FileReaderBase::SupportsRandomAccess();
  }

 protected:
  explicit FileReaderBase(Closed) noexcept : Reader(kClosed) {}

  explicit FileReaderBase(BufferOptions buffer_options, tsl::Env* env,
                          bool growing_source);

  FileReaderBase(FileReaderBase&& that) noexcept;
  FileReaderBase& operator=(FileReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options, tsl::Env* env, bool growing_source);
  void Initialize(tsl::RandomAccessFile* src, Position initial_pos);
  bool InitializeFilename(tsl::RandomAccessFile* src);
  bool InitializeFilename(PathInitializer filename);
  std::unique_ptr<tsl::RandomAccessFile> OpenFile();
  void InitializePos(Position initial_pos);

  void Done() override;
  absl::Status AnnotateStatusImpl(absl::Status status) override;
  void SetReadAllHintImpl(bool read_all_hint) override;
  bool PullSlow(size_t min_length, size_t recommended_length) override;
  using Reader::ReadSlow;
  bool ReadSlow(size_t length, char* dest) override;
  bool ReadSlow(size_t length, Chain& dest) override;
  bool ReadSlow(size_t length, absl::Cord& dest) override;
  using Reader::CopySlow;
  bool CopySlow(Position length, Writer& dest) override;
  bool CopySlow(size_t length, BackwardWriter& dest) override;
  using Reader::ReadSomeSlow;
  bool ReadSomeSlow(size_t max_length, char* dest) override;
  using Reader::CopySomeSlow;
  bool CopySomeSlow(size_t max_length, Writer& dest) override;
  bool SyncImpl(SyncType sync_type) override;
  bool SeekSlow(Position new_pos) override;
  std::optional<Position> SizeImpl() override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;
  std::unique_ptr<Reader> NewReaderCurrentPosImpl() override;

 private:
  ABSL_ATTRIBUTE_COLD bool FailOperation(const absl::Status& status,
                                         absl::string_view operation);
  ABSL_ATTRIBUTE_COLD static absl::Status NoRandomAccessStatus();

  void set_exact_size(std::optional<Position> exact_size) {
    buffer_sizer_.set_exact_size(exact_size);
  }
  std::optional<Position> exact_size() const {
    return buffer_sizer_.exact_size();
  }

  // Discards buffer contents.
  void SyncBuffer();

  // Clears `buffer_`. Reads `length` bytes from `*src`, from the physical file
  // position which is `limit_pos()`, to `dest[]`.
  //
  // Increments `limit_pos()` by the length read. Returns `true` on success.
  bool ReadToDest(size_t length, tsl::RandomAccessFile* src, char* dest);

  // Reads `flat_buffer.size()` bytes from `*src`, from the physical file
  // position which is `limit_pos()`, preferably to `flat_buffer.data()`. Newly
  // read data are adjacent to previously available data in `buffer_`, if any.
  // `cursor_index` is the amount of already read data before previously
  // available data.
  //
  // Increments `limit_pos()` by the length read. Sets buffer pointers. Returns
  // `true` on success.
  //
  // Precondition: `flat_buffer` is a suffix of `buffer_`
  bool ReadToBuffer(size_t cursor_index, tsl::RandomAccessFile* src,
                    absl::Span<char> flat_buffer);

  // Implementation of `CopySlow(Writer&)` in terms of `Writer::Push()` and
  // `ReadToDest()`. Does not use buffer pointers.
  //
  // Precondition: `length > 0`
  bool CopyUsingPush(Position length, tsl::RandomAccessFile* src, Writer& dest);

  std::string filename_{kDefaultFilename};
  // Invariant: if `is_open()` then `env_ != nullptr`
  tsl::Env* env_ = nullptr;
  tsl::FileSystem* file_system_ = nullptr;
  bool growing_source_ = false;
  ReadBufferSizer buffer_sizer_;
  // If `buffer_` is not empty, it contains buffered data, read directly before
  // the physical source position which is `limit_pos()`. Otherwise buffered
  // data are in memory managed by the `tsl::RandomAccessFile`. In any
  // case `start()` points to them.
  SizedSharedBuffer buffer_;

  // Invariants if `!buffer_.empty()`:
  //   `start() == buffer_.data()`
  //   `start_to_limit() == buffer_.size()`
};

// A `Reader` which reads from a `tsl::RandomAccessFile`.
//
// It supports random access and `NewReader()` if the
// `tsl::RandomAccessFile` supports
// `tsl::RandomAccessFile::Name()` and the name is not empty.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the `tsl::RandomAccessFile` being read from. `Src`
// must support `Dependency<tsl::RandomAccessFile*, Src>`, e.g.
// `std::unique_ptr<tsl::RandomAccessFile>` (owned, default),
// `tsl::RandomAccessFile*` (not owned),
// `Any<tsl::RandomAccessFile*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The `tsl::RandomAccessFile` must not be closed until the
// `FileReader` is closed or no longer used.
template <typename Src = std::unique_ptr<tsl::RandomAccessFile>>
class FileReader : public FileReaderBase {
 public:
  // Creates a closed `FileReader`.
  explicit FileReader(Closed) noexcept : FileReaderBase(kClosed) {}

  // Will read from the `tsl::RandomAccessFile` provided by `src`.
  explicit FileReader(Initializer<Src> src, Options options = Options());

  // Opens a `tsl::RandomAccessFile` for reading.
  //
  // If opening the file fails, `FileReader` will be failed and closed.
  explicit FileReader(PathInitializer filename, Options options = Options());

  FileReader(FileReader&& that) = default;
  FileReader& operator=(FileReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `FileReader`. This avoids
  // constructing a temporary `FileReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());
  ABSL_ATTRIBUTE_REINITIALIZES
  void Reset(PathInitializer filename, Options options = Options());

  // Returns the object providing and possibly owning the
  // `tsl::RandomAccessFile` being read from. If the
  // `tsl::RandomAccessFile` is owned then changed to `nullptr` by
  // `Close()`, otherwise unchanged.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  tsl::RandomAccessFile* SrcFile() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;

 private:
  using FileReaderBase::Initialize;
  void Initialize(PathInitializer filename, Options&& options);

  // The object providing and possibly owning the
  // `tsl::RandomAccessFile` being read from.
  Dependency<tsl::RandomAccessFile*, Src> src_;
};

explicit FileReader(Closed) -> FileReader<DeleteCtad<Closed>>;
template <typename Src>
explicit FileReader(Src&& src,
                    FileReaderBase::Options options = FileReaderBase::Options())
    -> FileReader<std::conditional_t<
        std::is_convertible_v<Src&&, PathInitializer>,
        std::unique_ptr<tsl::RandomAccessFile>, TargetT<Src>>>;

// Implementation details follow.

inline FileReaderBase::FileReaderBase(BufferOptions buffer_options,
                                      tsl::Env* env, bool growing_source)
    : env_(env != nullptr ? env : tsl::Env::Default()),
      growing_source_(growing_source),
      buffer_sizer_(buffer_options) {}

inline FileReaderBase::FileReaderBase(FileReaderBase&& that) noexcept
    : Reader(static_cast<Reader&&>(that)),
      filename_(std::exchange(that.filename_, std::string(kDefaultFilename))),
      env_(that.env_),
      file_system_(that.file_system_),
      growing_source_(that.growing_source_),
      buffer_sizer_(that.buffer_sizer_),
      buffer_(std::move(that.buffer_)) {}

inline FileReaderBase& FileReaderBase::operator=(
    FileReaderBase&& that) noexcept {
  Reader::operator=(static_cast<Reader&&>(that));
  filename_ = std::exchange(that.filename_, std::string(kDefaultFilename));
  env_ = that.env_;
  file_system_ = that.file_system_;
  growing_source_ = that.growing_source_;
  buffer_sizer_ = that.buffer_sizer_;
  buffer_ = std::move(that.buffer_);
  return *this;
}

inline void FileReaderBase::Reset(Closed) {
  Reader::Reset(kClosed);
  filename_ = std::string(kDefaultFilename);
  env_ = nullptr;
  file_system_ = nullptr;
  growing_source_ = false;
  buffer_sizer_.Reset();
  buffer_ = SizedSharedBuffer();
}

inline void FileReaderBase::Reset(BufferOptions buffer_options, tsl::Env* env,
                                  bool growing_source) {
  Reader::Reset();
  // `filename_` will be set by `InitializeFilename()`.
  env_ = env != nullptr ? env : tsl::Env::Default();
  file_system_ = nullptr;
  growing_source_ = growing_source;
  buffer_sizer_.Reset(buffer_options);
  buffer_.Clear();
}

inline void FileReaderBase::Initialize(tsl::RandomAccessFile* src,
                                       Position initial_pos) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of FileReader: null RandomAccessFile pointer";
  if (ABSL_PREDICT_FALSE(!InitializeFilename(src))) return;
  InitializePos(initial_pos);
}

template <typename Src>
inline FileReader<Src>::FileReader(Initializer<Src> src, Options options)
    : FileReaderBase(options.buffer_options(), options.env(),
                     options.growing_source()),
      src_(std::move(src)) {
  Initialize(src_.get(), options.initial_pos());
}

template <typename Src>
inline FileReader<Src>::FileReader(PathInitializer filename, Options options)
    : FileReaderBase(options.buffer_options(), options.env(),
                     options.growing_source()) {
  Initialize(std::move(filename), std::move(options));
}

template <typename Src>
inline void FileReader<Src>::Reset(Closed) {
  FileReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void FileReader<Src>::Reset(Initializer<Src> src, Options options) {
  FileReaderBase::Reset(options.buffer_options(), options.env(),
                        options.growing_source());
  src_.Reset(std::move(src));
  Initialize(src_.get(), options.initial_pos());
}

template <typename Src>
inline void FileReader<Src>::Reset(PathInitializer filename, Options options) {
  FileReaderBase::Reset(options.buffer_options(), options.env(),
                        options.growing_source());
  Initialize(std::move(filename), std::move(options));
}

template <typename Src>
inline void FileReader<Src>::Initialize(PathInitializer filename,
                                        Options&& options) {
  if (ABSL_PREDICT_FALSE(!InitializeFilename(std::move(filename)))) return;
  std::unique_ptr<tsl::RandomAccessFile> src = OpenFile();
  if (ABSL_PREDICT_FALSE(src == nullptr)) return;
  src_.Reset(riegeli::Maker(src.release()));
  InitializePos(options.initial_pos());
}

template <typename Src>
void FileReader<Src>::Done() {
  FileReaderBase::Done();
  if (src_.IsOwning()) {
    // The only way to close a `tsl::RandomAccessFile` is to delete it.
    src_.Reset();
  }
}

}  // namespace tensorflow

}  // namespace riegeli

#endif  // RIEGELI_TENSORFLOW_IO_FILE_READER_H_


================================================
FILE: riegeli/tensorflow/io/file_writer.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/tensorflow/io/file_writer.h"

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/buffering.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/object.h"
#include "riegeli/base/reset.h"
#include "riegeli/base/shared_buffer.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/path_ref.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/tensorflow/io/file_reader.h"
#include "tensorflow/compiler/xla/tsl/platform/env.h"
#include "tensorflow/compiler/xla/tsl/platform/file_system.h"
#include "tensorflow/core/public/version.h"

namespace riegeli::tensorflow {

bool FileWriterBase::InitializeFilename(tsl::WritableFile* dest) {
  absl::string_view filename;
  if (const absl::Status status = dest->Name(&filename);
      ABSL_PREDICT_FALSE(!status.ok())) {
    filename_ = "<unknown>";
    if (!absl::IsUnimplemented(status)) {
      return FailOperation(status, "WritableFile::Name()");
    }
    return true;
  }
  return InitializeFilename(filename);
}

bool FileWriterBase::InitializeFilename(PathInitializer filename) {
  riegeli::Reset(filename_, std::move(filename));
  if (const absl::Status status =
          env_->GetFileSystemForFile(filename_, &file_system_);
      ABSL_PREDICT_FALSE(!status.ok())) {
    return FailOperation(status, "Env::GetFileSystemForFile()");
  }
  return true;
}

std::unique_ptr<tsl::WritableFile> FileWriterBase::OpenFile(bool append) {
  std::unique_ptr<tsl::WritableFile> dest;
  if (const absl::Status status =
          append ? file_system_->NewAppendableFile(filename_, &dest)
                 : file_system_->NewWritableFile(filename_, &dest);
      ABSL_PREDICT_FALSE(!status.ok())) {
    Writer::Reset(kClosed);
    FailOperation(status,
                  append ? absl::string_view("FileSystem::NewAppendableFile()")
                         : absl::string_view("FileSystem::NewWritableFile()"));
    RIEGELI_ASSERT(dest == nullptr)
        << (append ? absl::string_view("FileSystem::NewAppendableFile()")
                   : absl::string_view("FileSystem::NewWritableFile()"))
        << " should store null WritableFile on failure";
  }
  return dest;
}

void FileWriterBase::InitializePos(tsl::WritableFile* dest) {
  int64_t file_pos;
  if (const absl::Status status = dest->Tell(&file_pos);
      ABSL_PREDICT_FALSE(!status.ok())) {
    FailOperation(status, "WritableFile::Tell()");
    return;
  }
  set_start_pos(IntCast<Position>(file_pos));
  buffer_sizer_.BeginRun(start_pos());
}

void FileWriterBase::Done() {
  SyncBuffer();
  Writer::Done();
  buffer_ = SharedBuffer();
}

bool FileWriterBase::FailOperation(const absl::Status& status,
                                   absl::string_view operation) {
  RIEGELI_ASSERT(!status.ok())
      << "Failed precondition of FileWriterBase::FailOperation(): "
         "status not failed";
  return Fail(
      Annotate(absl::Status(static_cast<absl::StatusCode>(status.code()),
#if TF_GRAPH_DEF_VERSION < 1467
                            status.error_message()
#else
                            status.message()
#endif
                                ),
               absl::StrCat(operation, " failed")));
}

absl::Status FileWriterBase::AnnotateStatusImpl(absl::Status status) {
  return Writer::AnnotateStatusImpl(
      Annotate(status, absl::StrCat("writing ", filename_)));
}

inline bool FileWriterBase::SyncBuffer() {
  if (start_to_cursor() > kMaxBytesToCopy) {
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    const absl::Cord data(
        ExternalRef(buffer_, absl::string_view(start(), start_to_cursor())));
    set_buffer();
    return WriteInternal(data);
  }
  const absl::string_view data(start(), start_to_cursor());
  set_buffer();
  if (data.empty()) return true;
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  return WriteInternal(data);
}

void FileWriterBase::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  buffer_sizer_.set_write_size_hint(pos(), write_size_hint);
}

bool FileWriterBase::PushSlow(size_t min_length, size_t recommended_length) {
  RIEGELI_ASSERT_LT(available(), min_length)
      << "Failed precondition of Writer::PushSlow(): "
         "enough space available, use Push() instead";
  if (ABSL_PREDICT_FALSE(!SyncBuffer())) return false;
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (ABSL_PREDICT_FALSE(min_length >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  const size_t buffer_length = UnsignedMin(
      buffer_sizer_.BufferLength(start_pos(), min_length, recommended_length),
      std::numeric_limits<Position>::max() - start_pos());
  buffer_.Reset(buffer_length);
  set_buffer(buffer_.mutable_data(), buffer_length);
  return true;
}

bool FileWriterBase::WriteInternal(absl::string_view src) {
  RIEGELI_ASSERT(!src.empty())
      << "Failed precondition of FileWriterBase::WriteInternal(): "
         "nothing to write";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of FileWriterBase::WriteInternal()";
  tsl::WritableFile* const dest = DestFile();
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  if (const absl::Status status = dest->Append(src);
      ABSL_PREDICT_FALSE(!status.ok())) {
    return FailOperation(status, "WritableFile::Append(string_view)");
  }
  move_start_pos(src.size());
  return true;
}

bool FileWriterBase::WriteSlow(absl::string_view src) {
  RIEGELI_ASSERT_LT(available(), src.size())
      << "Failed precondition of Writer::WriteSlow(string_view): "
         "enough space available, use Write(string_view) instead";
  if (src.size() >= buffer_sizer_.BufferLength(pos())) {
    // Write directly from `src`.
    if (ABSL_PREDICT_FALSE(!SyncBuffer())) return false;
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    return WriteInternal(src);
  }
  return Writer::WriteSlow(src);
}

bool FileWriterBase::WriteSlow(ExternalRef src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ExternalRef): "
         "enough space available, use Write(ExternalRef) instead";
  if (src.size() >= buffer_sizer_.BufferLength(pos())) {
    // Write directly from `src`.
    if (ABSL_PREDICT_FALSE(!SyncBuffer())) return false;
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    return WriteInternal(absl::Cord(std::move(src)));
  }
  return Writer::WriteSlow(std::move(src));
}

bool FileWriterBase::WriteSlow(const Chain& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain): "
         "enough space available, use Write(Chain) instead";
  if (src.size() >= buffer_sizer_.BufferLength(pos())) {
    // Write directly from `src`.
    if (ABSL_PREDICT_FALSE(!SyncBuffer())) return false;
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    return WriteInternal(absl::Cord(src));
  }
  return Writer::WriteSlow(src);
}

bool FileWriterBase::WriteSlow(Chain&& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Chain&&): "
         "enough space available, use Write(Chain&&) instead";
  if (src.size() >= buffer_sizer_.BufferLength(pos())) {
    // Write directly from `src`.
    if (ABSL_PREDICT_FALSE(!SyncBuffer())) return false;
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    return WriteInternal(absl::Cord(std::move(src)));
  }
  // Not `std::move(src)`: forward to `Writer::WriteSlow(const Chain&)`,
  // because `Writer::WriteSlow(Chain&&)` would forward to
  // `FileWriterBase::WriteSlow(const Chain&)`.
  return Writer::WriteSlow(src);
}

bool FileWriterBase::WriteSlow(const absl::Cord& src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(Cord): "
         "enough space available, use Write(Cord) instead";
  if (src.size() >= buffer_sizer_.BufferLength(pos())) {
    // Write directly from `src`.
    if (ABSL_PREDICT_FALSE(!SyncBuffer())) return false;
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    return WriteInternal(src);
  }
  return Writer::WriteSlow(src);
}

bool FileWriterBase::WriteSlow(ByteFill src) {
  RIEGELI_ASSERT_LT(UnsignedMin(available(), kMaxBytesToCopy), src.size())
      << "Failed precondition of Writer::WriteSlow(ByteFill): "
         "enough space available, use Write(ByteFill) instead";
  if (src.size() >= buffer_sizer_.BufferLength(pos())) {
    // Write directly from `Cord(ByteFill)`.
    if (ABSL_PREDICT_FALSE(!SyncBuffer())) return false;
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    while (
        ABSL_PREDICT_FALSE(src.size() > std::numeric_limits<size_t>::max())) {
      if (ABSL_PREDICT_FALSE(!WriteInternal(
              absl::Cord(src.Extract(std::numeric_limits<size_t>::max()))))) {
        return false;
      }
    }
    return WriteInternal(absl::Cord(src));
  }
  return Writer::WriteSlow(src);
}

bool FileWriterBase::WriteInternal(const absl::Cord& src) {
  RIEGELI_ASSERT(!src.empty())
      << "Failed precondition of FileWriterBase::WriteInternal(): "
         "nothing to write";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of FileWriterBase::WriteInternal()";
  tsl::WritableFile* const dest = DestFile();
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  if (absl::Status status = dest->Append(src);
      ABSL_PREDICT_FALSE(!status.ok())) {
    return FailOperation(status, "WritableFile::Append(Cord)");
  }
  move_start_pos(src.size());
  return true;
}

bool FileWriterBase::FlushImpl(FlushType flush_type) {
  buffer_sizer_.EndRun(pos());
  if (ABSL_PREDICT_FALSE(!SyncBuffer())) return false;
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  buffer_sizer_.BeginRun(start_pos());
  return true;
}

Reader* FileWriterBase::ReadModeImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!FileWriterBase::SupportsReadMode())) {
    Fail(absl::UnimplementedError("A filename required for read mode"));
    return nullptr;
  }
  if (ABSL_PREDICT_FALSE(!Flush())) return nullptr;
  return associated_reader_.ResetReader(
      filename_, FileReaderBase::Options()
                     .set_env(env_)
                     .set_initial_pos(initial_pos)
                     .set_buffer_options(buffer_sizer_.buffer_options()));
}

}  // namespace riegeli::tensorflow


================================================
FILE: riegeli/tensorflow/io/file_writer.h
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_TENSORFLOW_IO_FILE_WRITER_H_
#define RIEGELI_TENSORFLOW_IO_FILE_WRITER_H_

#include <stddef.h>

#include <memory>
#include <optional>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/external_ref.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/object.h"
#include "riegeli/base/shared_buffer.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/path_ref.h"
#include "riegeli/bytes/writer.h"
#include "tensorflow/compiler/xla/tsl/platform/env.h"
#include "tensorflow/compiler/xla/tsl/platform/file_system.h"

namespace riegeli {

class Reader;

namespace tensorflow {

template <typename Src>
class FileReader;

// Template parameter independent part of `FileWriter`.
class FileWriterBase : public Writer {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // `FileWriter` has a larger `kDefaultMaxBufferSize` (1M) because remote
    // file access may have high latency of each operation.
    static constexpr size_t kDefaultMaxBufferSize = size_t{1} << 20;

    // Overrides the TensorFlow environment.
    //
    // `nullptr` is interpreted as `tsl::Env::Default()`.
    //
    // Default: `nullptr`.
    Options& set_env(tsl::Env* env) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      env_ = env;
      return *this;
    }
    Options&& set_env(tsl::Env* env) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_env(env));
    }
    tsl::Env* env() const { return env_; }

    // If `false`, the file will be truncated to empty if it exists.
    //
    // If `true`, the file will not be truncated if it exists, and writing will
    // always happen at its end, or will continue at its end, depending on the
    // nature of the file.
    //
    // If `FileWriter` writes to an already open file, `append()` has no effect.
    //
    // Default: `false`.
    Options& set_append(bool append) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      append_ = append;
      return *this;
    }
    Options&& set_append(bool append) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_append(append));
    }
    bool append() const { return append_; }

   private:
    tsl::Env* env_ = nullptr;
    bool append_ = false;
  };

  // Returns the `tsl::WritableFile` being written to. Unchanged by
  // `Close()`.
  virtual tsl::WritableFile* DestFile() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns the name of the `tsl::WritableFile` being written to.
  // Unchanged by `Close()`.
  absl::string_view filename() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return filename_;
  }

  bool SupportsReadMode() override { return file_system_ != nullptr; }

 protected:
  explicit FileWriterBase(Closed) noexcept : Writer(kClosed) {}

  explicit FileWriterBase(BufferOptions buffer_options, tsl::Env* env);

  FileWriterBase(FileWriterBase&& that) noexcept;
  FileWriterBase& operator=(FileWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options, tsl::Env* env);
  void Initialize(tsl::WritableFile* dest);
  bool InitializeFilename(tsl::WritableFile* dest);
  bool InitializeFilename(PathInitializer filename);
  std::unique_ptr<tsl::WritableFile> OpenFile(bool append);
  void InitializePos(tsl::WritableFile* dest);
  ABSL_ATTRIBUTE_COLD bool FailOperation(const absl::Status& status,
                                         absl::string_view operation);

  void Done() override;
  absl::Status AnnotateStatusImpl(absl::Status status) override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool PushSlow(size_t min_length, size_t recommended_length) override;
  using Writer::WriteSlow;
  bool WriteSlow(absl::string_view src) override;
  bool WriteSlow(ExternalRef src) override;
  bool WriteSlow(const Chain& src) override;
  bool WriteSlow(Chain&& src) override;
  bool WriteSlow(const absl::Cord& src) override;
  bool WriteSlow(ByteFill src) override;
  bool FlushImpl(FlushType flush_type) override;
  Reader* ReadModeImpl(Position initial_pos) override;

 private:
  // Writes `buffer_` to the file. Sets buffer pointers to `nullptr`.
  bool SyncBuffer();

  // Writes `src` to the destination.
  //
  // Does not use buffer pointers. Increments `start_pos()` by the length
  // written, which must be `src.size()` on success. Returns `true` on success.
  //
  // Preconditions:
  //   `!src.empty()`
  //   `ok()`
  bool WriteInternal(absl::string_view src);
  bool WriteInternal(const absl::Cord& src);

  std::string filename_{kDefaultFilename};
  // Invariant: if `is_open()` then `env_ != nullptr`
  tsl::Env* env_ = nullptr;
  tsl::FileSystem* file_system_ = nullptr;
  WriteBufferSizer buffer_sizer_;
  // Buffered data to be written.
  SharedBuffer buffer_;

  AssociatedReader<FileReader<std::unique_ptr<tsl::RandomAccessFile>>>
      associated_reader_;
};

// A `Writer` which writes to a `tsl::WritableFile`.
//
// It supports `ReadMode()` if the `tsl::WritableFile` supports
// `tsl::WritableFile::Name()` and the name is not empty.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the `tsl::WritableFile` being written to. `Dest`
// must support `Dependency<tsl::WritableFile*, Dest>`, e.g.
// `std::unique_ptr<tsl::WritableFile>` (owned, default),
// `tsl::WritableFile*` (not owned),
// `Any<tsl::WritableFile*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The `tsl::WritableFile` must not be closed until the `FileWriter` is
// closed or no longer used. Until then the `tsl::WritableFile` may be
// accessed, but not concurrently, `Flush()` is needed before switching to
// another writer to the same `tsl::WritableFile`, and `pos()` does not
// take other writers into account.
template <typename Dest = std::unique_ptr<tsl::WritableFile>>
class FileWriter : public FileWriterBase {
 public:
  // Creates a closed `FileWriter`.
  explicit FileWriter(Closed) noexcept : FileWriterBase(kClosed) {}

  // Will write to the `tsl::WritableFile` provided by `dest`.
  explicit FileWriter(Initializer<Dest> dest, Options options = Options());

  // Opens a `tsl::WritableFile` for writing.
  //
  // If opening the file fails, `FileWriter` will be failed and closed.
  explicit FileWriter(PathInitializer filename, Options options = Options());

  FileWriter(FileWriter&& that) = default;
  FileWriter& operator=(FileWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `FileWriter`. This avoids
  // constructing a temporary `FileWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());
  ABSL_ATTRIBUTE_REINITIALIZES
  void Reset(PathInitializer filename, Options options = Options());

  // Returns the object providing and possibly owning the
  // `tsl::WritableFile` being written to. Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  tsl::WritableFile* DestFile() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  using FileWriterBase::Initialize;
  void Initialize(PathInitializer filename, Options&& options);

  // The object providing and possibly owning the `tsl::WritableFile`
  // being written to.
  Dependency<tsl::WritableFile*, Dest> dest_;
};

explicit FileWriter(Closed) -> FileWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit FileWriter(Dest&& dest,
                    FileWriterBase::Options options = FileWriterBase::Options())
    -> FileWriter<
        std::conditional_t<std::is_convertible_v<Dest&&, PathInitializer>,
                           std::unique_ptr<tsl::WritableFile>, TargetT<Dest>>>;

// Implementation details follow.

inline FileWriterBase::FileWriterBase(BufferOptions buffer_options,
                                      tsl::Env* env)
    : env_(env != nullptr ? env : tsl::Env::Default()),
      buffer_sizer_(buffer_options) {}

inline FileWriterBase::FileWriterBase(FileWriterBase&& that) noexcept
    : Writer(static_cast<Writer&&>(that)),
      filename_(std::exchange(that.filename_, std::string(kDefaultFilename))),
      env_(that.env_),
      file_system_(that.file_system_),
      buffer_sizer_(that.buffer_sizer_),
      buffer_(std::move(that.buffer_)),
      associated_reader_(std::move(that.associated_reader_)) {}

inline FileWriterBase& FileWriterBase::operator=(
    FileWriterBase&& that) noexcept {
  Writer::operator=(static_cast<Writer&&>(that));
  filename_ = std::exchange(that.filename_, std::string(kDefaultFilename));
  env_ = that.env_;
  file_system_ = that.file_system_;
  buffer_sizer_ = that.buffer_sizer_;
  buffer_ = std::move(that.buffer_);
  associated_reader_ = std::move(that.associated_reader_);
  return *this;
}

inline void FileWriterBase::Reset(Closed) {
  Writer::Reset(kClosed);
  filename_ = std::string(kDefaultFilename);
  env_ = nullptr;
  file_system_ = nullptr;
  buffer_sizer_.Reset();
  buffer_ = SharedBuffer();
  associated_reader_.Reset();
}

inline void FileWriterBase::Reset(BufferOptions buffer_options, tsl::Env* env) {
  Writer::Reset();
  // `filename_` will be set by `InitializeFilename()`.
  env_ = env != nullptr ? env : tsl::Env::Default();
  file_system_ = nullptr;
  buffer_sizer_.Reset(buffer_options);
  associated_reader_.Reset();
}

inline void FileWriterBase::Initialize(tsl::WritableFile* dest) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of FileWriter: null WritableFile pointer";
  if (ABSL_PREDICT_FALSE(!InitializeFilename(dest))) return;
  InitializePos(dest);
}

template <typename Dest>
inline FileWriter<Dest>::FileWriter(Initializer<Dest> dest, Options options)
    : FileWriterBase(options.buffer_options(), options.env()),
      dest_(std::move(dest)) {
  Initialize(dest_.get());
}

template <typename Dest>
inline FileWriter<Dest>::FileWriter(PathInitializer filename, Options options)
    : FileWriterBase(options.buffer_options(), options.env()) {
  Initialize(std::move(filename), std::move(options));
}

template <typename Dest>
inline void FileWriter<Dest>::Reset(Closed) {
  FileWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void FileWriter<Dest>::Reset(Initializer<Dest> dest, Options options) {
  FileWriterBase::Reset(options.buffer_options(), options.env());
  dest_.Reset(std::move(dest));
  Initialize(dest_.get());
}

template <typename Dest>
inline void FileWriter<Dest>::Reset(PathInitializer filename, Options options) {
  FileWriterBase::Reset(options.buffer_options(), options.env());
  Initialize(std::move(filename), std::move(options));
}

template <typename Dest>
inline void FileWriter<Dest>::Initialize(PathInitializer filename,
                                         Options&& options) {
  if (ABSL_PREDICT_FALSE(!InitializeFilename(std::move(filename)))) return;
  std::unique_ptr<tsl::WritableFile> dest = OpenFile(options.append());
  if (ABSL_PREDICT_FALSE(dest == nullptr)) return;
  dest_.Reset(riegeli::Maker(dest.release()));
  InitializePos(dest_.get());
}

template <typename Dest>
void FileWriter<Dest>::Done() {
  FileWriterBase::Done();
  if (dest_.IsOwning()) {
    if (const absl::Status status = dest_->Close();
        ABSL_PREDICT_FALSE(!status.ok()) && ABSL_PREDICT_TRUE(ok())) {
      FailOperation(status, "WritableFile::Close()");
    }
  }
}

template <typename Dest>
bool FileWriter<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!FileWriterBase::FlushImpl(flush_type))) return false;
  switch (flush_type) {
    case FlushType::kFromObject:
      if (!dest_.IsOwning()) return true;
      ABSL_FALLTHROUGH_INTENDED;
    case FlushType::kFromProcess:
      if (const absl::Status status = dest_->Flush();
          ABSL_PREDICT_FALSE(!status.ok())) {
        return FailOperation(status, "WritableFile::Flush()");
      }
      return true;
    case FlushType::kFromMachine:
      if (const absl::Status status = dest_->Sync();
          ABSL_PREDICT_FALSE(!status.ok())) {
        return FailOperation(status, "WritableFile::Sync()");
      }
      return true;
  }
  RIEGELI_ASSUME_UNREACHABLE()
      << "Unknown flush type: " << static_cast<int>(flush_type);
}

}  // namespace tensorflow

}  // namespace riegeli

#endif  // RIEGELI_TENSORFLOW_IO_FILE_WRITER_H_


================================================
FILE: riegeli/tensorflow/io/tstring_writer.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_TENSORFLOW_IO_TSTRING_WRITER_H_
#define RIEGELI_TENSORFLOW_IO_TSTRING_WRITER_H_

#include <stddef.h>

#include <type_traits>

#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/bytes/resizable_writer.h"
#include "tensorflow/tsl/platform/tstring.h"

namespace riegeli::tensorflow {

namespace tstring_internal {

// `ResizableTraits` for `tsl::tstring`.
struct TStringResizableTraits {
  using Resizable = tsl::tstring;
  static char* Data(Resizable& dest) { return dest.mdata(); }
  static size_t Size(const Resizable& dest) { return dest.size(); }
  static constexpr bool kIsStable = false;
  static bool Resize(Resizable& dest, size_t new_size, size_t used_size) {
    RIEGELI_ASSERT_LE(used_size, dest.size())
        << "Failed precondition of ResizableTraits::Resize(): "
           "used size exceeds old size";
    RIEGELI_ASSERT_LE(used_size, new_size)
        << "Failed precondition of ResizableTraits::Resize(): "
           "used size exceeds new size";
    Reserve(dest, new_size, used_size);
    dest.resize_uninitialized(new_size);
    return true;
  }
  static bool Grow(Resizable& dest, size_t new_size, size_t used_size) {
    RIEGELI_ASSERT_GT(new_size, dest.size())
        << "Failed precondition of ResizableTraits::Grow(): "
           "no need to grow";
    RIEGELI_ASSERT_LE(used_size, dest.size())
        << "Failed precondition of ResizableTraits::Grow(): "
           "used size exceeds old size";
    RIEGELI_ASSERT_LE(used_size, new_size)
        << "Failed precondition of ResizableTraits::Grow(): "
           "used size exceeds new size";
    Reserve(dest, new_size, used_size);
    dest.resize_uninitialized(dest.capacity());
    return true;
  }
  static bool GrowUnderCapacity(Resizable& dest, size_t new_size) {
    RIEGELI_ASSERT_GT(new_size, dest.size())
        << "Failed precondition of ResizableTraits::GrowUnderCapacity(): "
           "no need to grow";
    if (new_size > dest.capacity()) return false;
    dest.resize_uninitialized(dest.capacity());
    return true;
  }

 private:
  static void Reserve(Resizable& dest, size_t new_capacity, size_t used_size) {
    if (new_capacity > dest.capacity()) {
      dest.resize_uninitialized(used_size);
      dest.reserve(dest.capacity() <= tsl::tstring().capacity()
                       ? new_capacity
                       : UnsignedMax(dest.capacity() + dest.capacity() / 2,
                                     new_capacity));
    }
    RIEGELI_ASSUME_GE(dest.capacity(), new_capacity);
  }
};

}  // namespace tstring_internal

// Template parameter independent part of `TStringWriter`.
using TStringWriterBase = ResizableWriterBase;

// A `Writer` which writes to a `tsl::tstring`. If `Options::append()`
// is `false` (the default), replaces existing contents of the
// `tsl::tstring`, clearing it first. If `Options::append()` is `true`,
// appends to existing contents of the `tsl::tstring`.
//
// It supports `Seek()` and `ReadMode()`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the `tsl::tstring` being written to. `Dest` must
// support `Dependency<tsl::tstring*, Dest>`, e.g.
// `tsl::tstring*` (not owned,  default), `tsl::tstring` (owned),
// `Any<tsl::tstring*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as
// `tsl::tstring` if there are no constructor arguments or the only
// argument is `Options`, otherwise as `TargetT` of the type of the first
// constructor argument, except that CTAD is deleted if the first constructor
// argument is a `tsl::tstring&` or `const tsl::tstring&` (to avoid writing to
// an unintentionally separate copy of an existing object).
//
// The `tsl::tstring` must not be accessed until the `TStringWriter` is
// closed or no longer used, except that it is allowed to read the
// `tsl::tstring` immediately after `Flush()`.
template <typename Dest = tsl::tstring*>
class TStringWriter
    : public ResizableWriter<tstring_internal::TStringResizableTraits, Dest> {
 public:
  using TStringWriter::ResizableWriter::ResizableWriter;

  TStringWriter(TStringWriter&& that) = default;
  TStringWriter& operator=(TStringWriter&& that) = default;
};

explicit TStringWriter(Closed) -> TStringWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit TStringWriter(Dest&& dest, TStringWriterBase::Options options =
                                        TStringWriterBase::Options())
    -> TStringWriter<std::conditional_t<
        std::conjunction_v<std::is_lvalue_reference<Dest>,
                           std::is_convertible<std::remove_reference_t<Dest>*,
                                               const tsl::tstring*>>,
        DeleteCtad<Dest&&>, TargetT<Dest>>>;
explicit TStringWriter(
    TStringWriterBase::Options options = TStringWriterBase::Options())
    -> TStringWriter<tsl::tstring>;

}  // namespace riegeli::tensorflow

#endif  // RIEGELI_TENSORFLOW_IO_TSTRING_WRITER_H_


================================================
FILE: riegeli/tensorflow/kernels/riegeli_dataset_ops.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <stddef.h>
#include <stdint.h>

#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <vector>

#include "absl/base/thread_annotations.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/synchronization/mutex.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/maker.h"
#include "riegeli/records/record_position.h"
#include "riegeli/records/record_reader.h"
#include "riegeli/records/skipped_region.h"
#include "riegeli/tensorflow/io/file_reader.h"
#include "tensorflow/core/framework/allocator.h"
#include "tensorflow/core/framework/dataset.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/op_requires.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/tensor_shape.h"
#include "tensorflow/core/framework/tensor_types.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/graph/graph.h"
#include "tensorflow/core/platform/macros.h"
#include "tensorflow/core/platform/tstring.h"
#include "util/task/status_macros.h"

namespace riegeli::tensorflow {
namespace {

class RiegeliDatasetOp : public ::tensorflow::data::DatasetOpKernel {
 public:
  using DatasetOpKernel::DatasetOpKernel;

  void MakeDataset(::tensorflow::OpKernelContext* ctx,
                   ::tensorflow::data::DatasetBase** output) override {
    const ::tensorflow::Tensor* filenames_tensor;
    OP_REQUIRES_OK(ctx, ctx->input("filenames", &filenames_tensor));
    OP_REQUIRES(ctx, filenames_tensor->dims() <= 1,
                absl::InvalidArgumentError(
                    "`filenames` must be a scalar or a vector."));

    std::vector<std::string> filenames;
    filenames.reserve(IntCast<size_t>(filenames_tensor->NumElements()));
    for (int i = 0; i < filenames_tensor->NumElements(); ++i) {
      filenames.emplace_back(
          filenames_tensor->flat<::tensorflow::tstring>()(i));
    }

    int64_t min_buffer_size;
    OP_REQUIRES_OK(ctx, ::tensorflow::data::ParseScalarArgument<int64_t>(
                            ctx, "min_buffer_size", &min_buffer_size));
    int64_t max_buffer_size;
    OP_REQUIRES_OK(ctx, ::tensorflow::data::ParseScalarArgument<int64_t>(
                            ctx, "max_buffer_size", &max_buffer_size));

    *output = new Dataset(ctx, std::move(filenames), min_buffer_size,
                          max_buffer_size);
  }

 private:
  class Dataset : public ::tensorflow::data::DatasetBase {
   public:
    explicit Dataset(::tensorflow::OpKernelContext* ctx,
                     std::vector<std::string> filenames,
                     int64_t min_buffer_size, int64_t max_buffer_size)
        : DatasetBase(::tensorflow::data::DatasetContext(ctx)),
          filenames_(std::move(filenames)),
          min_buffer_size_(min_buffer_size),
          max_buffer_size_(max_buffer_size) {}

    std::unique_ptr<::tensorflow::data::IteratorBase> MakeIteratorInternal(
        const std::string& prefix) const override {
      return std::unique_ptr<::tensorflow::data::IteratorBase>(
          new Iterator({this, absl::StrCat(prefix, "::Riegeli")}));
    }

    const ::tensorflow::DataTypeVector& output_dtypes() const override {
      static const ::tensorflow::DataTypeVector* const dtypes =
          new ::tensorflow::DataTypeVector({::tensorflow::DT_STRING});
      return *dtypes;
    }

    const std::vector<::tensorflow::PartialTensorShape>& output_shapes()
        const override {
      static const std::vector<::tensorflow::PartialTensorShape>* const shapes =
          new std::vector<::tensorflow::PartialTensorShape>({{}});
      return *shapes;
    }

    std::string DebugString() const override {
      return "RiegeliDatasetOp::Dataset";
    }

    absl::Status CheckExternalState() const override {
      return absl::OkStatus();
    }

    absl::Status InputDatasets(
        std::vector<const ::tensorflow::data::DatasetBase*>* inputs)
        const override {
      inputs->clear();
      return absl::OkStatus();
    }

   protected:
    absl::Status AsGraphDefInternal(
        ::tensorflow::data::SerializationContext* ctx,
        DatasetGraphDefBuilder* b, ::tensorflow::Node** output) const override {
      ::tensorflow::Node* filenames = nullptr;
      RETURN_IF_ERROR(b->AddVector(filenames_, &filenames));
      ::tensorflow::Node* min_buffer_size = nullptr;
      RETURN_IF_ERROR(b->AddScalar(min_buffer_size_, &min_buffer_size));
      ::tensorflow::Node* max_buffer_size = nullptr;
      RETURN_IF_ERROR(b->AddScalar(max_buffer_size_, &max_buffer_size));
      RETURN_IF_ERROR(b->AddDataset(
          this, {filenames, min_buffer_size, max_buffer_size}, output));
      return absl::OkStatus();
    }

   private:
    class Iterator : public ::tensorflow::data::DatasetIterator<Dataset> {
     public:
      explicit Iterator(const Params& params) : DatasetIterator(params) {}

      absl::Status GetNextInternal(
          ::tensorflow::data::IteratorContext* ctx,
          std::vector<::tensorflow::Tensor>* out_tensors,
          bool* end_of_sequence) override ABSL_LOCKS_EXCLUDED(mu_) {
        absl::MutexLock lock(mu_);
        for (;;) {
          if (reader_ != std::nullopt) {
            // We are currently processing a file, so try to read the next
            // record.
            ::tensorflow::Tensor result_tensor(::tensorflow::cpu_allocator(),
                                               ::tensorflow::DT_STRING, {});
            absl::string_view value;
            if (TF_PREDICT_TRUE(reader_->ReadRecord(value))) {
              result_tensor.scalar<::tensorflow::tstring>()().assign(
                  value.data(), value.size());
              out_tensors->push_back(std::move(result_tensor));
              *end_of_sequence = false;
              return absl::OkStatus();
            }
            SkippedRegion skipped_region;
            if (reader_->Recover(&skipped_region)) {
              // File has invalid contents: return an error. Further iteration
              // will resume reading the file after the invalid region has been
              // skipped.
              *end_of_sequence = false;
              return absl::InvalidArgumentError(absl::StrCat(
                  "Skipping invalid region of a Riegeli/records file: ",
                  skipped_region));
            }
            if (TF_PREDICT_FALSE(!reader_->Close())) {
              // Failed to read the file: return an error.
              absl::Status status = reader_->status();
              // Further iteration will move on to the next file, if any.
              reader_.reset();
              ++current_file_index_;
              *end_of_sequence =
                  current_file_index_ == dataset()->filenames_.size();
              return status;
            }
            // We have reached the end of the current file, so move on to the
            // next file, if any.
            reader_.reset();
            ++current_file_index_;
          }

          // Iteration ends when there are no more files to process.
          if (current_file_index_ == dataset()->filenames_.size()) {
            *end_of_sequence = true;
            return absl::OkStatus();
          }

          // Actually move on to next file.
          OpenFile(ctx);
        }
      }

     protected:
      absl::Status SaveInternal(::tensorflow::data::SerializationContext* ctx,
                                ::tensorflow::data::IteratorStateWriter* writer)
          override ABSL_LOCKS_EXCLUDED(mu_) {
        absl::MutexLock lock(mu_);
        RETURN_IF_ERROR(
            writer->WriteScalar(full_name("current_file_index"),
                                IntCast<int64_t>(current_file_index_)));
        if (reader_ != std::nullopt) {
          RETURN_IF_ERROR(writer->WriteScalar(full_name("current_pos"),
                                              reader_->pos().ToBytes()));
        }
        return absl::OkStatus();
      }

      absl::Status RestoreInternal(
          ::tensorflow::data::IteratorContext* ctx,
          ::tensorflow::data::IteratorStateReader* reader) override
          ABSL_LOCKS_EXCLUDED(mu_) {
        absl::MutexLock lock(mu_);
        current_file_index_ = 0;
        reader_.reset();

        int64_t current_file_index;
        RETURN_IF_ERROR(reader->ReadScalar(full_name("current_file_index"),
                                           &current_file_index));
        if (TF_PREDICT_FALSE(current_file_index < 0 ||
                             IntCast<uint64_t>(current_file_index) >
                                 dataset()->filenames_.size())) {
          return absl::InternalError("current_file_index out of range");
        }
        current_file_index_ = IntCast<size_t>(current_file_index);

        if (reader->Contains(full_name("current_pos"))) {
          if (TF_PREDICT_FALSE(current_file_index_ ==
                               dataset()->filenames_.size())) {
            return absl::InternalError("current_file_index out of range");
          }
          ::tensorflow::tstring current_pos;
          RETURN_IF_ERROR(
              reader->ReadScalar(full_name("current_pos"), &current_pos));
          RecordPosition pos;
          if (TF_PREDICT_FALSE(!pos.FromBytes(current_pos))) {
            return absl::InternalError(
                "current_pos is not a valid RecordPosition");
          }
          OpenFile(ctx);
          reader_->Seek(pos);
          // Any errors from seeking will be reported during reading.
        }
        return absl::OkStatus();
      }

     private:
      void OpenFile(::tensorflow::data::IteratorContext* ctx)
          ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_) {
        reader_.emplace(riegeli::Maker(
            dataset()->filenames_[current_file_index_],
            tensorflow::FileReaderBase::Options()
                .set_env(ctx->env())
                .set_min_buffer_size(
                    IntCast<size_t>(dataset()->min_buffer_size_))
                .set_max_buffer_size(
                    IntCast<size_t>(dataset()->max_buffer_size_))));
      }

      // Invariants:
      //   `current_file_index_ <= dataset()->filenames_.size()`
      //   if `current_file_index_ == dataset()->filenames_.size()` then
      //       `reader_ == std::nullopt`

      absl::Mutex mu_;
      size_t current_file_index_ ABSL_GUARDED_BY(mu_) = 0;
      // `std::nullopt` means not open yet.
      std::optional<RecordReader<tensorflow::FileReader<>>> reader_
          ABSL_GUARDED_BY(mu_);
    };

    const std::vector<std::string> filenames_;
    const int64_t min_buffer_size_;
    const int64_t max_buffer_size_;
  };
};

REGISTER_KERNEL_BUILDER(Name("RiegeliDataset").Device(::tensorflow::DEVICE_CPU),
                        RiegeliDatasetOp);

}  // namespace
}  // namespace riegeli::tensorflow


================================================
FILE: riegeli/tensorflow/ops/riegeli_dataset_ops.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "tensorflow/core/framework/common_shape_fns.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"

namespace riegeli::tensorflow {

REGISTER_OP("RiegeliDataset")
    .Input("filenames: string")
    .Input("min_buffer_size: int64")
    .Input("max_buffer_size: int64")
    .Output("handle: variant")
    .SetIsStateful()
    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
      ::tensorflow::shape_inference::ShapeHandle unused;
      // `filenames` must be a scalar or a vector.
      TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(0), 1, &unused));
      // `min_buffer_size` could only be a scalar.
      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
      // `max_buffer_size` could only be a scalar.
      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
      return ::tensorflow::shape_inference::ScalarShape(c);
    })
    .Doc(R"doc(
Creates a dataset that emits the records from one or more Riegeli/records files.

filenames: A scalar or vector containing the name(s) of the file(s) to be
  read.
min_buffer_size: Tunes the minimal buffer size, which determines how much data
  at a time is typically read from the file. The actual buffer size changes
  between min_buffer_size and max_buffer_size depending on the access pattern.
max_buffer_size: Tunes the maximal buffer size, which determines how much data
  at a time is typically read from the file. The actual buffer size changes
  between min_buffer_size and max_buffer_size depending on the access pattern.
)doc");

}  // namespace riegeli::tensorflow


================================================
FILE: riegeli/text/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "concat",
    hdrs = ["concat.h"],
    deps = [
        "//riegeli/base:initializer",
        "//riegeli/bytes:ostream_writer",
        "//riegeli/bytes:stringify",
        "//riegeli/bytes:stringify_writer",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base:core_headers",
    ],
)

cc_library(
    name = "write_int",
    srcs = ["write_int.cc"],
    hdrs = ["write_int.h"],
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:types",
        "//riegeli/bytes:ostream_writer",
        "//riegeli/bytes:write_int_internal",
        "//riegeli/bytes:writer",
        "//riegeli/endian:endian_writing",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/numeric:bits",
        "@com_google_absl//absl/numeric:int128",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "ascii_align",
    hdrs = ["ascii_align.h"],
    deps = [
        ":concat",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:byte_fill",
        "//riegeli/base:chain",
        "//riegeli/base:initializer",
        "//riegeli/base:type_traits",
        "//riegeli/base:types",
        "//riegeli/bytes:ostream_writer",
        "//riegeli/bytes:restricted_chain_writer",
        "//riegeli/bytes:stringify",
        "//riegeli/bytes:stringify_writer",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base:core_headers",
    ],
)

cc_library(
    name = "join",
    hdrs = ["join.h"],
    deps = [
        "//riegeli/base:initializer",
        "//riegeli/base:iterable",
        "//riegeli/base:types",
        "//riegeli/bytes:ostream_writer",
        "//riegeli/bytes:stringify",
        "//riegeli/bytes:stringify_writer",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
    ],
)


================================================
FILE: riegeli/text/ascii_align.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_TEXT_ASCII_ALIGN_H_
#define RIEGELI_TEXT_ASCII_ALIGN_H_

#include <stddef.h>

#include <algorithm>
#include <limits>
#include <ostream>
#include <tuple>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/byte_fill.h"
#include "riegeli/base/chain.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/type_traits.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/ostream_writer.h"
#include "riegeli/bytes/restricted_chain_writer.h"
#include "riegeli/bytes/stringify.h"
#include "riegeli/bytes/stringify_writer.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/text/concat.h"

namespace riegeli {

// Options for `AsciiLeft()`, `AsciiCenter()`, and `AsciiRight()`.
class AlignOptions {
 public:
  AlignOptions() noexcept {}

  // Options can also be specified by the minimum width alone.
  /*implicit*/ AlignOptions(Position width) : width_(width) {}

  // Minimum width.
  //
  // Default: 0.
  AlignOptions& set_width(Position width) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
    width_ = width;
    return *this;
  }
  AlignOptions&& set_width(Position width) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_width(width));
  }
  Position width() const { return width_; }

  // The character to fill space before and/or after the value with.
  //
  // Default: ' '.
  AlignOptions& set_fill(char fill) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
    fill_ = fill;
    return *this;
  }
  AlignOptions&& set_fill(char fill) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_fill(fill));
  }
  char fill() const { return fill_; }

 private:
  Position width_ = 0;
  char fill_ = ' ';
};

// The type returned by `riegeli::AsciiLeft()` and `riegeli::OwningAsciiLeft()`.
template <typename... T>
class AsciiLeftType {
 public:
  explicit AsciiLeftType(std::tuple<Initializer<T>...> values,
                         AlignOptions options)
      : values_(std::move(values)), options_(std::move(options)) {}

  template <typename Sink>
  friend void AbslStringify(Sink& dest, const AsciiLeftType& src) {
    src.Stringify(dest);
  }
  template <typename Sink>
  friend void AbslStringify(Sink& dest, AsciiLeftType&& src) {
    std::move(src).Stringify(dest);
  }

  friend std::ostream& operator<<(std::ostream& dest,
                                  const AsciiLeftType& src) {
    OStreamWriter writer(&dest);
    src.WriteTo(writer);
    writer.Close();
    return dest;
  }
  friend std::ostream& operator<<(std::ostream& dest, AsciiLeftType&& src) {
    OStreamWriter writer(&dest);
    std::move(src).WriteTo(writer);
    writer.Close();
    return dest;
  }

  friend auto RiegeliStringifiedSize(const AsciiLeftType& src) {
    if constexpr (HasStringifiedSize<T...>::value) {
      return UnsignedMax(riegeli::StringifiedSize(src.values_),
                         src.options_.width());
    }
  }

 private:
  template <typename Sink>
  void Stringify(Sink& dest) const&;
  template <typename Sink>
  void Stringify(Sink& dest) &&;

  // Faster implementation if `Sink` is `WriterStringifySink`.
  void Stringify(WriterStringifySink& dest) const& { WriteTo(*dest.dest()); }
  void Stringify(WriterStringifySink& dest) && {
    std::move(*this).WriteTo(*dest.dest());
  }

  void WriteTo(Writer& dest) const&;
  void WriteTo(Writer& dest) &&;

  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS ConcatType<T...> values_;
  AlignOptions options_;
};

// Wraps a sequence of values such that their concatenated stringified
// representation is filled to at least the given width, with the values on the
// left side of the field.
//
// The last argument is `AlignOptions` or the width. The remaining arguments are
// the values.
//
// The width is measured in bytes, so this is suitable only for ASCII data.
//
// `riegeli::AsciiLeft()` does not own the values, even if they involve
// temporaries, hence it should be stringified by the same expression which
// constructed it, so that the temporaries outlive its usage. For storing
// an `AsciiLeftType` in a variable or returning it from a function, use
// `riegeli::OwningAsciiLeft()` or construct `AsciiLeftType` directly.
template <
    typename... Args,
    std::enable_if_t<
        std::conjunction_v<
            std::bool_constant<sizeof...(Args) != 2>,
            std::is_convertible<GetTypeFromEndT<1, Args&&...>, AlignOptions>,
            TupleElementsSatisfy<RemoveTypesFromEndT<1, TargetRefT<Args>...>,
                                 IsStringifiable>>,
        int> = 0>
inline ApplyToTupleElementsT<AsciiLeftType,
                             RemoveTypesFromEndT<1, TargetRefT<Args>...>>
AsciiLeft(Args&&... args) {
  return ApplyToTupleElementsT<AsciiLeftType,
                               RemoveTypesFromEndT<1, TargetRefT<Args>...>>(
      RemoveFromEnd<1>(std::forward<Args>(args)...),
      GetFromEnd<1>(std::forward<Args>(args)...));
}

// A specialization for one stringifiable parameter which allows to annotate the
// parameter with `ABSL_ATTRIBUTE_LIFETIME_BOUND`.
template <typename Arg,
          std::enable_if_t<IsStringifiable<TargetRefT<Arg>>::value, int> = 0>
inline AsciiLeftType<TargetRefT<Arg>> AsciiLeft(
    Arg&& arg ABSL_ATTRIBUTE_LIFETIME_BOUND, AlignOptions options) {
  return AsciiLeftType<TargetRefT<Arg>>(
      std::forward_as_tuple(std::forward<Arg>(arg)), std::move(options));
}

// `riegeli::OwningAsciiLeft()` is like `riegeli::AsciiLeft()`, but the
// arguments are stored by value instead of by reference. This is useful for
// storing the `AsciiLeftType` in a variable or returning it from a function.
//
// If a particular argument is heavy and its lifetime is sufficient for storing
// it by reference, convert `const std::string&` to `absl::string_view` or wrap
// the argument in `std::cref()`.
template <
    typename... Args,
    std::enable_if_t<
        std::conjunction_v<
            std::is_convertible<GetTypeFromEndT<1, Args&&...>, AlignOptions>,
            TupleElementsSatisfy<RemoveTypesFromEndT<1, TargetT<Args>...>,
                                 IsStringifiable>>,
        int> = 0>
inline ApplyToTupleElementsT<AsciiLeftType,
                             RemoveTypesFromEndT<1, TargetT<Args>...>>
OwningAsciiLeft(Args&&... args) {
  return ApplyToTupleElementsT<AsciiLeftType,
                               RemoveTypesFromEndT<1, TargetT<Args>...>>(
      RemoveFromEnd<1>(std::forward<Args>(args)...),
      GetFromEnd<1>(std::forward<Args>(args)...));
}

// The type returned by `riegeli::AsciiCenter()` and
// `riegeli::OwningAsciiCenter()`.
template <typename... T>
class AsciiCenterType {
 public:
  explicit AsciiCenterType(std::tuple<Initializer<T>...> values,
                           AlignOptions options)
      : values_(std::move(values)), options_(std::move(options)) {}

  template <typename Sink>
  friend void AbslStringify(Sink& dest, const AsciiCenterType& src) {
    src.Stringify(dest);
  }
  template <typename Sink>
  friend void AbslStringify(Sink& dest, AsciiCenterType&& src) {
    std::move(src).Stringify(dest);
  }

  friend std::ostream& operator<<(std::ostream& dest,
                                  const AsciiCenterType& src) {
    OStreamWriter writer(&dest);
    src.WriteTo(writer);
    writer.Close();
    return dest;
  }
  friend std::ostream& operator<<(std::ostream& dest, AsciiCenterType&& src) {
    OStreamWriter writer(&dest);
    std::move(src).WriteTo(writer);
    writer.Close();
    return dest;
  }

  friend auto RiegeliStringifiedSize(const AsciiCenterType& src) {
    if constexpr (HasStringifiedSize<T...>::value) {
      return UnsignedMax(riegeli::StringifiedSize(src.values_),
                         src.options_.width());
    }
  }

 private:
  template <typename Sink>
  void Stringify(Sink& dest) const&;
  template <typename Sink>
  void Stringify(Sink& dest) &&;

  // Faster implementation if `Sink` is `WriterStringifySink`.
  void Stringify(WriterStringifySink& dest) const& { WriteTo(*dest.dest()); }
  void Stringify(WriterStringifySink& dest) && {
    std::move(*this).WriteTo(*dest.dest());
  }

  void WriteTo(Writer& dest) const&;
  void WriteTo(Writer& dest) &&;

  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS ConcatType<T...> values_;
  AlignOptions options_;
};

// Wraps a sequence of values such that their concatenated stringified
// representation is filled to at least the given width, with the values
// centered in the field (with one fill character fewer on the left side if
// there is an odd number of them).
//
// The last argument is `AlignOptions` or the width. The remaining arguments are
// the values.
//
// The width is measured in bytes, so this is suitable only for ASCII data.
//
// `riegeli::AsciiCenter()` does not own the values, even if they involve
// temporaries, hence it should be stringified by the same expression which
// constructed it, so that the temporaries outlive its usage. For storing
// an `AsciiCenterType` in a variable or returning it from a function, use
// `riegeli::OwningAsciiCenter()` or construct `AsciiCenterType` directly.
template <
    typename... Args,
    std::enable_if_t<
        std::conjunction_v<
            std::bool_constant<sizeof...(Args) != 2>,
            std::is_convertible<GetTypeFromEndT<1, Args&&...>, AlignOptions>,
            TupleElementsSatisfy<RemoveTypesFromEndT<1, TargetRefT<Args>...>,
                                 IsStringifiable>>,
        int> = 0>
inline ApplyToTupleElementsT<AsciiCenterType,
                             RemoveTypesFromEndT<1, TargetRefT<Args>...>>
AsciiCenter(Args&&... args) {
  return ApplyToTupleElementsT<AsciiCenterType,
                               RemoveTypesFromEndT<1, TargetRefT<Args>...>>(
      RemoveFromEnd<1>(std::forward<Args>(args)...),
      GetFromEnd<1>(std::forward<Args>(args)...));
}

// A specialization for one stringifiable parameter which allows to annotate the
// parameter with `ABSL_ATTRIBUTE_LIFETIME_BOUND`.
template <typename Arg,
          std::enable_if_t<IsStringifiable<TargetRefT<Arg>>::value, int> = 0>
inline AsciiCenterType<TargetRefT<Arg>> AsciiCenter(
    Arg&& arg ABSL_ATTRIBUTE_LIFETIME_BOUND, AlignOptions options) {
  return AsciiCenterType<TargetRefT<Arg>>(
      std::forward_as_tuple(std::forward<Arg>(arg)), std::move(options));
}

// `riegeli::OwningAsciiCenter()` is like `riegeli::AsciiCenter()`, but the
// arguments are stored by value instead of by reference. This is useful for
// storing the `AsciiCenterType` in a variable or returning it from a function.
//
// If a particular argument is heavy and its lifetime is sufficient for storing
// it by reference, convert `const std::string&` to `absl::string_view` or wrap
// the argument in `std::cref()`.
template <
    typename... Args,
    std::enable_if_t<
        std::conjunction_v<
            std::is_convertible<GetTypeFromEndT<1, Args&&...>, AlignOptions>,
            TupleElementsSatisfy<RemoveTypesFromEndT<1, TargetT<Args>...>,
                                 IsStringifiable>>,
        int> = 0>
inline ApplyToTupleElementsT<AsciiCenterType,
                             RemoveTypesFromEndT<1, TargetT<Args>...>>
OwningAsciiCenter(Args&&... args) {
  return ApplyToTupleElementsT<AsciiCenterType,
                               RemoveTypesFromEndT<1, TargetT<Args>...>>(
      RemoveFromEnd<1>(std::forward<Args>(args)...),
      GetFromEnd<1>(std::forward<Args>(args)...));
}

// The type returned by `riegeli::AsciiRight()` and
// `riegeli::OwningAsciiRight()`.
template <typename... T>
class AsciiRightType {
 public:
  explicit AsciiRightType(std::tuple<Initializer<T>...> values,
                          AlignOptions options)
      : values_(std::move(values)), options_(std::move(options)) {}

  template <typename Sink>
  friend void AbslStringify(Sink& dest, const AsciiRightType& src) {
    src.Stringify(dest);
  }
  template <typename Sink>
  friend void AbslStringify(Sink& dest, AsciiRightType&& src) {
    std::move(src).Stringify(dest);
  }

  friend std::ostream& operator<<(std::ostream& dest,
                                  const AsciiRightType& src) {
    OStreamWriter writer(&dest);
    src.WriteTo(writer);
    writer.Close();
    return dest;
  }
  friend std::ostream& operator<<(std::ostream& dest, AsciiRightType&& src) {
    OStreamWriter writer(&dest);
    std::move(src).WriteTo(writer);
    writer.Close();
    return dest;
  }

  friend auto RiegeliStringifiedSize(const AsciiRightType& src) {
    if constexpr (HasStringifiedSize<T...>::value) {
      return UnsignedMax(riegeli::StringifiedSize(src.values_),
                         src.options_.width());
    }
  }

 private:
  template <typename Sink>
  void Stringify(Sink& dest) const&;
  template <typename Sink>
  void Stringify(Sink& dest) &&;

  // Faster implementation if `Sink` is `WriterStringifySink`.
  void Stringify(WriterStringifySink& dest) const& { WriteTo(*dest.dest()); }
  void Stringify(WriterStringifySink& dest) && {
    std::move(*this).WriteTo(*dest.dest());
  }

  void WriteTo(Writer& dest) const&;
  void WriteTo(Writer& dest) &&;

  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS ConcatType<T...> values_;
  AlignOptions options_;
};

// Wraps a sequence of values such that their concatenated stringified
// representation is filled to at least the given width, with the values on the
// right side of the field.
//
// The last argument is `AlignOptions` or the width. The remaining arguments are
// the values.
//
// The width is measured in bytes, so this is suitable only for ASCII data.
//
// `riegeli::AsciiRight()` does not own the values, even if they involve
// temporaries, hence it should be stringified by the same expression which
// constructed it, so that the temporaries outlive its usage. For storing
// an `AsciiRightType` in a variable or returning it from a function, use
// `riegeli::OwningAsciiRight()` or construct `AsciiRightType` directly.
template <
    typename... Args,
    std::enable_if_t<
        std::conjunction_v<
            std::bool_constant<sizeof...(Args) != 2>,
            std::is_convertible<GetTypeFromEndT<1, Args&&...>, AlignOptions>,
            TupleElementsSatisfy<RemoveTypesFromEndT<1, TargetRefT<Args>...>,
                                 IsStringifiable>>,
        int> = 0>
inline ApplyToTupleElementsT<AsciiRightType,
                             RemoveTypesFromEndT<1, TargetRefT<Args>...>>
AsciiRight(Args&&... args) {
  return ApplyToTupleElementsT<AsciiRightType,
                               RemoveTypesFromEndT<1, TargetRefT<Args>...>>(
      RemoveFromEnd<1>(std::forward<Args>(args)...),
      GetFromEnd<1>(std::forward<Args>(args)...));
}

// A specialization for one stringifiable parameter which allows to annotate the
// parameter with `ABSL_ATTRIBUTE_LIFETIME_BOUND`.
template <typename Arg,
          std::enable_if_t<IsStringifiable<TargetRefT<Arg>>::value, int> = 0>
inline AsciiRightType<TargetRefT<Arg>> AsciiRight(
    Arg&& arg ABSL_ATTRIBUTE_LIFETIME_BOUND, AlignOptions options) {
  return AsciiRightType<TargetRefT<Arg>>(
      std::forward_as_tuple(std::forward<Arg>(arg)), std::move(options));
}

// `riegeli::OwningAsciiRight()` is like `riegeli::AsciiRight()`, but the
// arguments are stored by value instead of by reference. This is useful for
// storing the `AsciiRightType` in a variable or returning it from a function.
//
// If a particular argument is heavy and its lifetime is sufficient for storing
// it by reference, convert `const std::string&` to `absl::string_view` or wrap
// the argument in `std::cref()`.
template <
    typename... Args,
    std::enable_if_t<
        std::conjunction_v<
            std::is_convertible<GetTypeFromEndT<1, Args&&...>, AlignOptions>,
            TupleElementsSatisfy<RemoveTypesFromEndT<1, TargetT<Args>...>,
                                 IsStringifiable>>,
        int> = 0>
inline ApplyToTupleElementsT<AsciiRightType,
                             RemoveTypesFromEndT<1, TargetT<Args>...>>
OwningAsciiRight(Args&&... args) {
  return ApplyToTupleElementsT<AsciiRightType,
                               RemoveTypesFromEndT<1, TargetT<Args>...>>(
      RemoveFromEnd<1>(std::forward<Args>(args)...),
      GetFromEnd<1>(std::forward<Args>(args)...));
}

// Implementation details follow.

namespace align_internal {

template <typename Sink>
inline void WritePadding(Sink& dest, Position length, char fill) {
  while (ABSL_PREDICT_FALSE(length > std::numeric_limits<size_t>::max())) {
    dest.Append(std::numeric_limits<size_t>::max(), fill);
    length -= std::numeric_limits<size_t>::max();
  }
  if (length > 0) dest.Append(IntCast<size_t>(length), fill);
}

}  // namespace align_internal

template <typename... T>
template <typename Sink>
inline void AsciiLeftType<T...>::Stringify(Sink& dest) const& {
  StringifyWriter writer(&dest);
  writer.Write(values_);
  if (ABSL_PREDICT_FALSE(!writer.Close())) return;
  align_internal::WritePadding(
      dest, SaturatingSub(options_.width(), writer.pos()), options_.fill());
}

template <typename... T>
template <typename Sink>
inline void AsciiLeftType<T...>::Stringify(Sink& dest) && {
  StringifyWriter writer(&dest);
  writer.Write(std::move(values_));
  if (ABSL_PREDICT_FALSE(!writer.Close())) return;
  align_internal::WritePadding(
      dest, SaturatingSub(options_.width(), writer.pos()), options_.fill());
}

template <typename... T>
inline void AsciiLeftType<T...>::WriteTo(Writer& dest) const& {
  const Position pos_before = dest.pos();
  dest.Write(values_);
  RIEGELI_ASSERT_GE(dest.pos(), pos_before)
      << "Writer::Write() decreased pos()";
  dest.Write(ByteFill(SaturatingSub(options_.width(), dest.pos() - pos_before),
                      options_.fill()));
}

template <typename... T>
inline void AsciiLeftType<T...>::WriteTo(Writer& dest) && {
  const Position pos_before = dest.pos();
  dest.Write(std::move(values_));
  RIEGELI_ASSERT_GE(dest.pos(), pos_before)
      << "Writer::Write() decreased pos()";
  dest.Write(ByteFill(SaturatingSub(options_.width(), dest.pos() - pos_before),
                      options_.fill()));
}

template <typename... T>
template <typename Sink>
inline void AsciiCenterType<T...>::Stringify(Sink& dest) const& {
  if constexpr (HasStringifiedSize<T...>::value) {
    const Position padding =
        SaturatingSub(options_.width(), riegeli::StringifiedSize(values_));
    align_internal::WritePadding(dest, padding / 2, options_.fill());
    StringifyWriter writer(&dest);
    writer.Write(values_);
    if (ABSL_PREDICT_FALSE(!writer.Close())) return;
    align_internal::WritePadding(dest, padding - padding / 2, options_.fill());
  } else {
    RestrictedChainWriter chain_writer;
    chain_writer.Write(values_);
    if (ABSL_PREDICT_FALSE(!chain_writer.Close())) return;
    const Position padding =
        SaturatingSub(options_.width(), chain_writer.dest().size());
    align_internal::WritePadding(dest, padding / 2, options_.fill());
    AbslStringify(dest, chain_writer.dest());
    align_internal::WritePadding(dest, padding - padding / 2, options_.fill());
  }
}

template <typename... T>
template <typename Sink>
inline void AsciiCenterType<T...>::Stringify(Sink& dest) && {
  if constexpr (HasStringifiedSize<T...>::value) {
    const Position padding =
        SaturatingSub(options_.width(), riegeli::StringifiedSize(values_));
    align_internal::WritePadding(dest, padding / 2, options_.fill());
    StringifyWriter writer(&dest);
    writer.Write(std::move(values_));
    if (ABSL_PREDICT_FALSE(!writer.Close())) return;
    align_internal::WritePadding(dest, padding - padding / 2, options_.fill());
  } else {
    RestrictedChainWriter chain_writer;
    chain_writer.Write(std::move(values_));
    if (ABSL_PREDICT_FALSE(!chain_writer.Close())) return;
    const Position padding =
        SaturatingSub(options_.width(), chain_writer.dest().size());
    align_internal::WritePadding(dest, padding / 2, options_.fill());
    AbslStringify(dest, chain_writer.dest());
    align_internal::WritePadding(dest, padding - padding / 2, options_.fill());
  }
}

template <typename... T>
inline void AsciiCenterType<T...>::WriteTo(Writer& dest) const& {
  if constexpr (HasStringifiedSize<T...>::value) {
    const Position padding =
        SaturatingSub(options_.width(), riegeli::StringifiedSize(values_));
    dest.Write(ByteFill(padding / 2, options_.fill()));
    dest.Write(values_);
    dest.Write(ByteFill(padding - padding / 2, options_.fill()));
  } else {
    RestrictedChainWriter chain_writer;
    chain_writer.Write(values_);
    if (ABSL_PREDICT_FALSE(!chain_writer.Close())) {
      dest.Fail(chain_writer.status());
      return;
    }
    const Position padding =
        SaturatingSub(options_.width(), chain_writer.dest().size());
    dest.Write(ByteFill(padding / 2, options_.fill()));
    dest.Write(std::move(chain_writer.dest()));
    dest.Write(ByteFill(padding - padding / 2, options_.fill()));
  }
}

template <typename... T>
inline void AsciiCenterType<T...>::WriteTo(Writer& dest) && {
  if constexpr (HasStringifiedSize<T...>::value) {
    const Position padding =
        SaturatingSub(options_.width(), riegeli::StringifiedSize(values_));
    dest.Write(ByteFill(padding / 2, options_.fill()));
    dest.Write(std::move(values_));
    dest.Write(ByteFill(padding - padding / 2, options_.fill()));
  } else {
    RestrictedChainWriter chain_writer;
    chain_writer.Write(std::move(values_));
    if (ABSL_PREDICT_FALSE(!chain_writer.Close())) {
      dest.Fail(chain_writer.status());
      return;
    }
    const Position padding =
        SaturatingSub(options_.width(), chain_writer.dest().size());
    dest.Write(ByteFill(padding / 2, options_.fill()));
    dest.Write(std::move(chain_writer.dest()));
    dest.Write(ByteFill(padding - padding / 2, options_.fill()));
  }
}

template <typename... T>
template <typename Sink>
inline void AsciiRightType<T...>::Stringify(Sink& dest) const& {
  if constexpr (HasStringifiedSize<T...>::value) {
    align_internal::WritePadding(
        dest,
        SaturatingSub(options_.width(), riegeli::StringifiedSize(values_)),
        options_.fill());
    StringifyWriter writer(&dest);
    writer.Write(values_);
    writer.Close();
  } else {
    RestrictedChainWriter chain_writer;
    chain_writer.Write(values_);
    if (ABSL_PREDICT_FALSE(!chain_writer.Close())) return;
    const Position padding =
        SaturatingSub(options_.width(), chain_writer.dest().size());
    align_internal::WritePadding(dest, padding, options_.fill());
    AbslStringify(dest, chain_writer.dest());
  }
}

template <typename... T>
template <typename Sink>
inline void AsciiRightType<T...>::Stringify(Sink& dest) && {
  if constexpr (HasStringifiedSize<T...>::value) {
    align_internal::WritePadding(
        dest,
        SaturatingSub(options_.width(), riegeli::StringifiedSize(values_)),
        options_.fill());
    StringifyWriter writer(&dest);
    writer.Write(std::move(values_));
    writer.Close();
  } else {
    RestrictedChainWriter chain_writer;
    chain_writer.Write(std::move(values_));
    if (ABSL_PREDICT_FALSE(!chain_writer.Close())) return;
    const Position padding =
        SaturatingSub(options_.width(), chain_writer.dest().size());
    align_internal::WritePadding(dest, padding, options_.fill());
    AbslStringify(dest, chain_writer.dest());
  }
}

template <typename... T>
inline void AsciiRightType<T...>::WriteTo(Writer& dest) const& {
  if constexpr (HasStringifiedSize<T...>::value) {
    dest.Write(ByteFill(
        SaturatingSub(options_.width(), riegeli::StringifiedSize(values_)),
        options_.fill()));
    dest.Write(values_);
  } else {
    RestrictedChainWriter chain_writer;
    chain_writer.Write(values_);
    if (ABSL_PREDICT_FALSE(!chain_writer.Close())) {
      dest.Fail(chain_writer.status());
      return;
    }
    dest.Write(
        ByteFill(SaturatingSub(options_.width(), chain_writer.dest().size()),
                 options_.fill()));
    dest.Write(std::move(chain_writer.dest()));
  }
}

template <typename... T>
inline void AsciiRightType<T...>::WriteTo(Writer& dest) && {
  if constexpr (HasStringifiedSize<T...>::value) {
    dest.Write(ByteFill(
        SaturatingSub(options_.width(), riegeli::StringifiedSize(values_)),
        options_.fill()));
    dest.Write(std::move(values_));
  } else {
    RestrictedChainWriter chain_writer;
    chain_writer.Write(std::move(values_));
    if (ABSL_PREDICT_FALSE(!chain_writer.Close())) {
      dest.Fail(chain_writer.status());
      return;
    }
    dest.Write(
        ByteFill(SaturatingSub(options_.width(), chain_writer.dest().size()),
                 options_.fill()));
    dest.Write(std::move(chain_writer.dest()));
  }
}

}  // namespace riegeli

#endif  // RIEGELI_TEXT_ASCII_ALIGN_H_


================================================
FILE: riegeli/text/concat.h
================================================
// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_TEXT_CONCAT_H_
#define RIEGELI_TEXT_CONCAT_H_

#include <ostream>
#include <tuple>
#include <type_traits>  // IWYU pragma: keep
#include <utility>

#include "absl/base/attributes.h"
#include "riegeli/base/initializer.h"
#include "riegeli/bytes/ostream_writer.h"
#include "riegeli/bytes/stringify.h"
#include "riegeli/bytes/stringify_writer.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

// The type returned by `riegeli::Concat()` and `riegeli::OwningConcat()`.
template <typename... T>
class ConcatType {
 public:
  explicit ConcatType(std::tuple<Initializer<T>...> values)
      : values_(std::move(values)) {}

  template <typename Sink>
  friend void AbslStringify(Sink& dest, const ConcatType& src) {
    src.Stringify(dest);
  }
  template <typename Sink>
  friend void AbslStringify(Sink& dest, ConcatType&& src) {
    std::move(src).Stringify(dest);
  }

  friend std::ostream& operator<<(std::ostream& dest, const ConcatType& src) {
    OStreamWriter<> writer(&dest);
    src.WriteTo(writer);
    writer.Close();
    return dest;
  }
  friend std::ostream& operator<<(std::ostream& dest, ConcatType&& src) {
    OStreamWriter<> writer(&dest);
    std::move(src).WriteTo(writer);
    writer.Close();
    return dest;
  }

  friend auto RiegeliStringifiedSize(const ConcatType& src) {
    return std::apply(
        [](const T&... values) { return riegeli::StringifiedSize(values...); },
        src.values_);
  }

 private:
  template <typename Sink>
  void Stringify(Sink& dest) const& {
    StringifyWriter writer(&dest);
    WriteTo(writer);
    writer.Close();
  }
  template <typename Sink>
  void Stringify(Sink& dest) && {
    StringifyWriter writer(&dest);
    std::move(*this).WriteTo(writer);
    writer.Close();
  }

  // Faster implementation if `Sink` is `WriterStringifySink`.
  void Stringify(WriterStringifySink& dest) const& { WriteTo(*dest.dest()); }
  void Stringify(WriterStringifySink& dest) && {
    std::move(*this).WriteTo(*dest.dest());
  }

  void WriteTo(Writer& dest) const& {
    std::apply([&](const T&... values) { dest.Write(values...); }, values_);
  }
  void WriteTo(Writer& dest) && {
    std::apply([&](T&&... values) { dest.Write(std::forward<T>(values)...); },
               std::move(values_));
  }

  ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS std::tuple<T...> values_;
};

// Wraps a sequence of values such that its stringified representation is the
// concatenation of stringified representations of the values.
//
// `riegeli::Concat()` does not own the values, even if they involve
// temporaries, hence it should be stringified by the same expression which
// constructed it, so that the temporaries outlive its usage. For storing
// a `ConcatType` in a variable or returning it from a function, use
// `riegeli::OwningConcat()` or construct `ConcatType` directly.
template <typename... Srcs
#if !__cpp_concepts
          ,
          std::enable_if_t<IsStringifiable<TargetRefT<Srcs>...>::value, int> = 0
#endif
          >
inline ConcatType<TargetRefT<Srcs>...> Concat(
    Srcs&&... srcs ABSL_ATTRIBUTE_LIFETIME_BOUND)
#if __cpp_concepts
    // For conjunctions, `requires` gives better error messages than
    // `std::enable_if_t`, indicating the relevant argument.
  requires(IsStringifiable<TargetRefT<Srcs>>::value && ...)
#endif
{
  return ConcatType<TargetRefT<Srcs>...>(
      std::forward_as_tuple(std::forward<Srcs>(srcs)...));
}

// `riegeli::OwningConcat()` is like `riegeli::Concat()`, but the arguments are
// stored by value instead of by reference. This is useful for storing the
// `ConcatType` in a variable or returning it from a function.
//
// If a particular argument is heavy and its lifetime is sufficient for storing
// it by reference, convert `const std::string&` to `absl::string_view` or wrap
// the argument in `std::cref()`.
template <typename... Srcs
#if !__cpp_concepts
          ,
          std::enable_if_t<IsStringifiable<TargetT<Srcs>...>::value, int> = 0
#endif
          >
inline ConcatType<TargetT<Srcs>...> OwningConcat(Srcs&&... srcs)
#if __cpp_concepts
    // For conjunctions, `requires` gives better error messages than
    // `std::enable_if_t`, indicating the relevant argument.
  requires(IsStringifiable<TargetT<Srcs>>::value && ...)
#endif
{
  return ConcatType<TargetT<Srcs>...>(
      std::forward_as_tuple(std::forward<Srcs>(srcs)...));
}

}  // namespace riegeli

#endif  // RIEGELI_TEXT_CONCAT_H_


================================================
FILE: riegeli/text/join.h
================================================
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_TEXT_JOIN_H_
#define RIEGELI_TEXT_JOIN_H_

#include <algorithm>
#include <functional>
#include <initializer_list>
#include <iterator>
#include <ostream>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/iterable.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/ostream_writer.h"
#include "riegeli/bytes/stringify.h"
#include "riegeli/bytes/stringify_writer.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

// `FormatterHasStringifiedSize<Formatter, Value>::value` is `true` if
// `Formatter` supports `StringifiedSize()` when called with a `Value`.
//
// `formatter.StringifiedSize(value)` returns the size of the formatted value as
// `Position` if easily known. A function returning `void` is treated as absent.

template <typename Formatter, typename Value, typename Enable = void>
struct FormatterHasStringifiedSize : std::false_type {};

template <typename Formatter, typename Value>
struct FormatterHasStringifiedSize<
    Formatter, Value,
    std::enable_if_t<std::is_convertible_v<
        decltype(std::declval<const Formatter&>().StringifiedSize(
            std::declval<const Value&>())),
        Position>>> : std::true_type {};

// The default formatter for `Join()` which formats a value by using
// `Writer::Write()`.
struct DefaultFormatter {
  template <typename Value>
  void operator()(const Value& src, Writer& dest) const {
    dest.Write(src);
  }

  template <typename Value>
  auto StringifiedSize(const Value& src) const {
    return riegeli::StringifiedSize(src);
  }
};

// A formatter for `Join()` which formats a value by invoking a function and
// using `Writer::Write()` on the result.
//
// The function should be cheap enough that invoking it twice to compute
// `StringifiedSize()` is acceptable. If it is expensive, use a lambda as the
// formatter: `[](Value src, Writer& dest) { dest.Write(function(src)); }`
template <typename Function>
class InvokingFormatter {
 public:
  constexpr InvokingFormatter() : function_() {}

  explicit InvokingFormatter(Initializer<Function> function)
      : function_(std::move(function)) {}

  template <typename Value>
  void operator()(const Value& src, Writer& dest) const {
    dest.Write(std::invoke(function_, src));
  }

  template <typename Value>
  auto StringifiedSize(const Value& src) const {
    return riegeli::StringifiedSize(std::invoke(function_, src));
  }

 private:
  Function function_;
};

template <typename Function>
explicit InvokingFormatter(Function&& function)
    -> InvokingFormatter<TargetT<Function>>;

// A formatter for `Join()` which decorates the value with a string before
// and/or a string after formatting it with another formatter.
template <typename ValueFormatter = DefaultFormatter>
class DecoratingFormatter {
 public:
  explicit constexpr DecoratingFormatter(
      absl::string_view after ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : value_formatter_(), after_(after) {}

  explicit DecoratingFormatter(Initializer<ValueFormatter> value_formatter,
                               absl::string_view after
                                   ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : value_formatter_(std::move(value_formatter)), after_(after) {}

  explicit constexpr DecoratingFormatter(
      absl::string_view before ABSL_ATTRIBUTE_LIFETIME_BOUND,
      absl::string_view after ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : before_(before), value_formatter_(), after_(after) {}

  explicit DecoratingFormatter(
      absl::string_view before ABSL_ATTRIBUTE_LIFETIME_BOUND,
      Initializer<ValueFormatter> value_formatter,
      absl::string_view after ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : before_(before),
        value_formatter_(std::move(value_formatter)),
        after_(after) {}

  template <typename Value>
  void operator()(const Value& src, Writer& dest) const {
    dest.Write(before_);
    value_formatter_(src, dest);
    dest.Write(after_);
  }

  template <typename Value>
  auto StringifiedSize(const Value& src) const {
    if constexpr (FormatterHasStringifiedSize<ValueFormatter, Value>::value) {
      return before_.size() + value_formatter_.StringifiedSize(src) +
             after_.size();
    }
  }

 private:
  absl::string_view before_;
  ValueFormatter value_formatter_;
  absl::string_view after_;
};

explicit DecoratingFormatter(absl::string_view after)
    -> DecoratingFormatter<DefaultFormatter>;
template <
    typename ValueFormatter = DefaultFormatter,
    std::enable_if_t<!std::is_convertible_v<ValueFormatter, absl::string_view>,
                     int> = 0>
explicit DecoratingFormatter(ValueFormatter&& value_formatter,
                             absl::string_view after)
    -> DecoratingFormatter<TargetT<ValueFormatter>>;
explicit DecoratingFormatter(absl::string_view before, absl::string_view after)
    -> DecoratingFormatter<DefaultFormatter>;
template <typename ValueFormatter = DefaultFormatter>
explicit DecoratingFormatter(absl::string_view before,
                             ValueFormatter&& value_formatter,
                             absl::string_view after)
    -> DecoratingFormatter<TargetT<ValueFormatter>>;

// A formatter for `Join()` which formats a pair with a separator between the
// elements.
template <typename FirstFormatter = DefaultFormatter,
          typename SecondFormatter = DefaultFormatter>
class PairFormatter {
 public:
  explicit constexpr PairFormatter(
      absl::string_view separator ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : first_formatter_(), separator_(separator), second_formatter_() {}

  explicit PairFormatter(absl::string_view separator
                             ABSL_ATTRIBUTE_LIFETIME_BOUND,
                         Initializer<SecondFormatter> second_formatter)
      : first_formatter_(),
        separator_(separator),
        second_formatter_(std::move(second_formatter)) {}

  explicit PairFormatter(Initializer<FirstFormatter> first_formatter,
                         absl::string_view separator
                             ABSL_ATTRIBUTE_LIFETIME_BOUND)
      : first_formatter_(std::move(first_formatter)),
        separator_(separator),
        second_formatter_() {}

  explicit PairFormatter(Initializer<FirstFormatter> first_formatter,
                         absl::string_view separator
                             ABSL_ATTRIBUTE_LIFETIME_BOUND,
                         Initializer<SecondFormatter> second_formatter)
      : first_formatter_(std::move(first_formatter)),
        separator_(separator),
        second_formatter_(std::move(second_formatter)) {}

  template <typename First, typename Second>
  void operator()(const std::pair<First, Second>& src, Writer& dest) const {
    first_formatter_(src.first, dest);
    dest.Write(separator_);
    second_formatter_(src.second, dest);
  }

  template <typename First, typename Second>
  auto StringifiedSize(const std::pair<First, Second>& src) const {
    if constexpr (std::conjunction_v<
                      FormatterHasStringifiedSize<FirstFormatter, First>,
                      FormatterHasStringifiedSize<SecondFormatter, Second>>) {
      return first_formatter_.StringifiedSize(src.first) + separator_.size() +
             second_formatter_.StringifiedSize(src.second);
    }
  }

 private:
  FirstFormatter first_formatter_;
  absl::string_view separator_;
  SecondFormatter second_formatter_;
};

template <typename SecondFormatter = DefaultFormatter>
explicit PairFormatter(absl::string_view separator,
                       SecondFormatter&& second_formatter = {})
    -> PairFormatter<DefaultFormatter, TargetT<SecondFormatter>>;
template <typename FirstFormatter = DefaultFormatter,
          typename SecondFormatter = DefaultFormatter>
explicit PairFormatter(FirstFormatter&& first_formatter,
                       absl::string_view separator,
                       SecondFormatter&& second_formatter = {})
    -> PairFormatter<TargetT<FirstFormatter>, TargetT<SecondFormatter>>;

// The type returned by `riegeli::Join()` and `riegeli::OwningJoin()`.
template <typename Src, typename Formatter = DefaultFormatter>
class JoinType {
 public:
  explicit JoinType(Initializer<Src> src, Initializer<Formatter> formatter = {})
      : src_(std::move(src)), formatter_(std::move(formatter)) {}

  explicit JoinType(Initializer<Src> src,
                    absl::string_view separator ABSL_ATTRIBUTE_LIFETIME_BOUND,
                    Initializer<Formatter> formatter = {})
      : src_(std::move(src)),
        separator_(separator),
        formatter_(std::move(formatter)) {}

  JoinType(const JoinType& that) = default;
  JoinType& operator=(const JoinType& that) = default;

  JoinType(JoinType&& that) = default;
  JoinType& operator=(JoinType&& that) = default;

  template <typename Sink>
  friend void AbslStringify(Sink& dest, const JoinType& src) {
    src.Stringify(dest);
  }

  friend std::ostream& operator<<(std::ostream& dest, const JoinType& src) {
    OStreamWriter<> writer(&dest);
    src.WriteTo(writer);
    writer.Close();
    return dest;
  }

  friend auto RiegeliStringifiedSize(const JoinType& src) {
    return src.StringifiedSize();
  }

 private:
  template <typename Sink>
  void Stringify(Sink& dest) const {
    StringifyWriter writer(&dest);
    WriteTo(writer);
    writer.Close();
  }

  // Faster implementation if `Sink` is `WriterStringifySink`.
  void Stringify(WriterStringifySink& dest) const { WriteTo(*dest.dest()); }

  void WriteTo(Writer& dest) const;

  auto StringifiedSize() const;

  Src src_;
  absl::string_view separator_;
  Formatter formatter_;
};

// `riegeli::Join()` wraps a collection such that its stringified representation
// joins elements with a separator. Each element is formatted with the given
// formatter.
//
// `riegeli::Join()` does not own the collection nor the formatter, even if they
// involve temporaries, hence it should be stringified by the same expression
// which constructed it, so that the temporaries outlive its usage. For storing
// a `JoinType` in a variable or returning it from a function, use
// `riegeli::OwningJoin()` or construct `JoinType` directly.

template <typename Src, typename Formatter = DefaultFormatter,
          std::enable_if_t<
              !std::is_convertible_v<Formatter&&, absl::string_view>, int> = 0>
inline JoinType<TargetRefT<Src>, TargetRefT<Formatter>> Join(
    Src&& src ABSL_ATTRIBUTE_LIFETIME_BOUND,
    Formatter&& formatter ABSL_ATTRIBUTE_LIFETIME_BOUND = Formatter()) {
  return JoinType<TargetRefT<Src>, TargetRefT<Formatter>>(
      std::forward<Src>(src), std::forward<Formatter>(formatter));
}

template <typename Value = absl::string_view,
          typename Formatter = DefaultFormatter,
          std::enable_if_t<
              !std::is_convertible_v<Formatter&&, absl::string_view>, int> = 0>
inline JoinType<std::initializer_list<Value>, TargetRefT<Formatter>> Join(
    std::initializer_list<Value> src ABSL_ATTRIBUTE_LIFETIME_BOUND,
    Formatter&& formatter ABSL_ATTRIBUTE_LIFETIME_BOUND = Formatter()) {
  return JoinType<std::initializer_list<Value>, TargetRefT<Formatter>>(
      src, std::forward<Formatter>(formatter));
}

template <typename Src, typename Formatter = DefaultFormatter>
inline JoinType<TargetRefT<Src>, TargetRefT<Formatter>> Join(
    Src&& src ABSL_ATTRIBUTE_LIFETIME_BOUND,
    absl::string_view separator ABSL_ATTRIBUTE_LIFETIME_BOUND,
    Formatter&& formatter ABSL_ATTRIBUTE_LIFETIME_BOUND = Formatter()) {
  return JoinType<TargetRefT<Src>, TargetRefT<Formatter>>(
      std::forward<Src>(src), separator, std::forward<Formatter>(formatter));
}

template <typename Value = absl::string_view,
          typename Formatter = DefaultFormatter>
inline JoinType<std::initializer_list<Value>, TargetRefT<Formatter>> Join(
    std::initializer_list<Value> src ABSL_ATTRIBUTE_LIFETIME_BOUND,
    absl::string_view separator ABSL_ATTRIBUTE_LIFETIME_BOUND,
    Formatter&& formatter ABSL_ATTRIBUTE_LIFETIME_BOUND = Formatter()) {
  return JoinType<std::initializer_list<Value>, TargetRefT<Formatter>>(
      src, separator, std::forward<Formatter>(formatter));
}

// `riegeli::OwningJoin()` is like `riegeli::Join()`, but the arguments are
// stored by value instead of by reference. This is useful for storing the
// `JoinType` in a variable or returning it from a function.
//
// If the lifetime of the collection is sufficient for storing it by reference,
// wrap it in `std::cref()`.

template <typename Src, typename Formatter = DefaultFormatter,
          std::enable_if_t<
              !std::is_convertible_v<Formatter&&, absl::string_view>, int> = 0>
inline JoinType<TargetT<Src>, TargetT<Formatter>> OwningJoin(
    Src&& src, Formatter&& formatter = Formatter()) {
  return JoinType<TargetT<Src>, TargetT<Formatter>>(
      std::forward<Src>(src), std::forward<Formatter>(formatter));
}

template <typename Value = absl::string_view,
          typename Formatter = DefaultFormatter,
          std::enable_if_t<
              !std::is_convertible_v<Formatter&&, absl::string_view>, int> = 0>
inline JoinType<std::initializer_list<Value>, TargetT<Formatter>> OwningJoin(
    std::initializer_list<Value> src, Formatter&& formatter = Formatter()) {
  return JoinType<std::initializer_list<Value>, TargetT<Formatter>>(
      src, std::forward<Formatter>(formatter));
}

template <typename Src, typename Formatter = DefaultFormatter>
inline JoinType<TargetT<Src>, TargetT<Formatter>> OwningJoin(
    Src&& src, absl::string_view separator ABSL_ATTRIBUTE_LIFETIME_BOUND,
    Formatter&& formatter = Formatter()) {
  return JoinType<TargetT<Src>, TargetT<Formatter>>(
      std::forward<Src>(src), separator, std::forward<Formatter>(formatter));
}

template <typename Value = absl::string_view,
          typename Formatter = DefaultFormatter>
inline JoinType<std::initializer_list<Value>, TargetT<Formatter>> OwningJoin(
    std::initializer_list<Value> src,
    absl::string_view separator ABSL_ATTRIBUTE_LIFETIME_BOUND,
    Formatter&& formatter = Formatter()) {
  return JoinType<std::initializer_list<Value>, TargetT<Formatter>>(
      src, separator, std::forward<Formatter>(formatter));
}

// Implementation details follow.

template <typename Src, typename Formatter>
void JoinType<Src, Formatter>::WriteTo(Writer& dest) const {
  using std::begin;
  using std::end;
  auto iter = begin(src_);
  auto end_iter = end(src_);
  if (iter == end_iter) return;
  for (;;) {
    formatter_(*iter, dest);
    ++iter;
    if (iter == end_iter) break;
    dest.Write(separator_);
  }
}

template <typename Src, typename Formatter>
auto JoinType<Src, Formatter>::StringifiedSize() const {
  if constexpr (std::conjunction_v<IsForwardIterable<Src>,
                                   FormatterHasStringifiedSize<
                                       Formatter, ElementTypeT<const Src>>>) {
    using std::begin;
    using std::end;
    auto iter = begin(src_);
    auto end_iter = end(src_);
    Position stringified_size = 0;
    if (iter == end_iter) return stringified_size;
    for (;;) {
      stringified_size += formatter_.StringifiedSize(*iter);
      ++iter;
      if (iter == end_iter) break;
      stringified_size += separator_.size();
    }
    return stringified_size;
  }
}

}  // namespace riegeli

#endif  // RIEGELI_TEXT_JOIN_H_


================================================
FILE: riegeli/text/write_int.cc
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/text/write_int.h"

#include <stddef.h>
#include <stdint.h>

#ifdef __SSSE3__
#include <emmintrin.h>
#include <tmmintrin.h>
#endif

#include <cstring>
#include <limits>  // IWYU pragma: keep

#include "absl/base/attributes.h"
#include "absl/numeric/bits.h"  // IWYU pragma: keep
#include "absl/numeric/int128.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/endian/endian_writing.h"

namespace riegeli::write_int_internal {

namespace {

#ifdef __SSSE3__

template <DigitCase digit_case>
__m128i HexDigits();

template <>
inline __m128i HexDigits<DigitCase::kLower>() {
  return _mm_setr_epi8('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a',
                       'b', 'c', 'd', 'e', 'f');
}

template <>
inline __m128i HexDigits<DigitCase::kUpper>() {
  return _mm_setr_epi8('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A',
                       'B', 'C', 'D', 'E', 'F');
}

template <DigitCase digit_case>
inline __m128i WriteHex2Impl(uint8_t src) {
  // Load 8-bit value to 128-bit register.
  const __m128i value = _mm_cvtsi32_si128(src);
  // Shift right by 4 bits.
  const __m128i shifted = _mm_srli_epi16(value, 4);
  // Interleave low and high nibbles into bytes.
  const __m128i interleaved = _mm_unpacklo_epi8(shifted, value);
  // Mask out high nibbles of bytes.
  const __m128i masked = _mm_and_si128(interleaved, _mm_set1_epi8(0xf));
  // Convert to characters.
  return _mm_shuffle_epi8(HexDigits<digit_case>(), masked);
}

template <DigitCase digit_case>
inline __m128i WriteHex4Impl(uint16_t src) {
  // Convert to Big Endian.
  char encoded[2];
  riegeli::WriteBigEndian<uint16_t>(src, encoded);
  // Load 16-bit value to 128-bit register.
  const __m128i value = _mm_loadu_si16(encoded);
  // Shift right by 4 bits.
  const __m128i shifted = _mm_srli_epi16(value, 4);
  // Interleave low and high nibbles into bytes.
  const __m128i interleaved = _mm_unpacklo_epi8(shifted, value);
  // Mask out high nibbles of bytes.
  const __m128i masked = _mm_and_si128(interleaved, _mm_set1_epi8(0xf));
  // Convert to characters.
  return _mm_shuffle_epi8(HexDigits<digit_case>(), masked);
}

template <DigitCase digit_case>
inline __m128i WriteHex8Impl(uint32_t src) {
  // Convert to Big Endian.
  char encoded[4];
  riegeli::WriteBigEndian<uint32_t>(src, encoded);
  // Load 32-bit value to 128-bit register.
  const __m128i value = _mm_loadu_si32(encoded);
  // Shift right by 4 bits.
  const __m128i shifted = _mm_srli_epi32(value, 4);
  // Interleave low and high nibbles into bytes.
  const __m128i interleaved = _mm_unpacklo_epi8(shifted, value);
  // Mask out high nibbles of bytes.
  const __m128i masked = _mm_and_si128(interleaved, _mm_set1_epi8(0xf));
  // Convert to characters.
  return _mm_shuffle_epi8(HexDigits<digit_case>(), masked);
}

template <DigitCase digit_case>
inline __m128i WriteHex16Impl(uint64_t src) {
  // Convert to Big Endian.
  char encoded[8];
  riegeli::WriteBigEndian<uint64_t>(src, encoded);
  // Load 64-bit value to 128-bit register.
  const __m128i value = _mm_loadu_si64(&encoded);
  // Shift right by 4 bits.
  const __m128i shifted = _mm_srli_epi64(value, 4);
  // Interleave low and high nibbles into bytes.
  const __m128i interleaved = _mm_unpacklo_epi8(shifted, value);
  // Mask out high nibbles of bytes.
  const __m128i masked = _mm_and_si128(interleaved, _mm_set1_epi8(0xf));
  // Convert to characters.
  return _mm_shuffle_epi8(HexDigits<digit_case>(), masked);
}

#endif

template <DigitCase digit_case, typename T>
inline T DigitCaseDependent(T for_lower, T for_upper) {
  switch (digit_case) {
    case DigitCase::kLower:
      return for_lower;
    case DigitCase::kUpper:
      return for_upper;
  }
}

// `WriteHex{1,2,4,8,16,32}Impl()` write a fixed number of digits.

template <DigitCase digit_case>
inline char* WriteHex1Impl(uint8_t src, char* dest) {
  RIEGELI_ASSERT_LT(src, 0x10)
      << "Failed precondition of WriteHex1Impl(): value too large";
  *dest = static_cast<char>(src) +
          (src < 10 ? '0' : DigitCaseDependent<digit_case>('a', 'A') - 10);
  return dest + 1;
}

template <DigitCase digit_case>
inline char* WriteHex2Impl(uint8_t src, char* dest) {
#ifdef __SSSE3__
  _mm_storeu_si16(dest, WriteHex2Impl<digit_case>(src));
#else
  uint16_t out = src;
  // Spread out nibbles to bytes (00AB -> 0AXB -> 0A0B).
  out = (out | (out << 4)) & 0x0f0f;
  // Convert each byte [0..9] to [6..15], and [10..15] to [16..21].
  out += 0x0606;
  // Keep bytes [6..15] unchanged. Convert each byte [16..21] to [55..60]
  // for `DigitCase::kLower`, or [23..28] for `DigitCase::kUpper`.
  out += DigitCaseDependent<digit_case>(39, 7) * ((out & 0x1010) >> 4);
  // Convert each byte [6..15] to ['0'..'9'], and [55..60] to ['a'..'f'] for
  // `DigitCase::kLower`, or [23..28] to ['A'..'F'] for `DigitCase::kUpper`.
  out += 0x2a2a;
  // Write the result, swapping the bytes.
  riegeli::WriteBigEndian<uint16_t>(out, dest);
#endif
  return dest + 2;
}

template <DigitCase digit_case>
inline char* WriteHex4Impl(uint16_t src, char* dest) {
#ifdef __SSSE3__
  _mm_storeu_si32(dest, WriteHex4Impl<digit_case>(src));
#else
  uint32_t out = src;
  // Spread out nibbles to bytes, swapping the middle ones
  // (0000ABCD -> 0ABCXBCD -> 0A0C0B0D).
  out = (out | (out << 12)) & 0x0f0f0f0f;
  // Convert each byte [0..9] to [6..15], and [10..15] to [16..21].
  out += 0x06060606;
  // Keep bytes [6..15] unchanged. Convert each byte [16..21] to [55..60]
  // for `DigitCase::kLower`, or [23..28] for `DigitCase::kUpper`.
  out += DigitCaseDependent<digit_case>(39, 7) * ((out & 0x10101010) >> 4);
  // Convert each byte [6..15] to ['0'..'9'], and [55..60] to ['a'..'f'] for
  // `DigitCase::kLower`, or [23..28] to ['A'..'F'] for `DigitCase::kUpper`.
  out += 0x2a2a2a2a;
  // Swap the first and the last byte.
  out = (out << 24) | (out >> 24) | (out & 0x00ffff00);
  // Write the result.
  riegeli::WriteLittleEndian<uint32_t>(out, dest);
#endif
  return dest + 4;
}

template <DigitCase digit_case>
inline char* WriteHex8Impl(uint32_t src, char* dest) {
#ifdef __SSSE3__
  _mm_storeu_si64(dest, WriteHex8Impl<digit_case>(src));
  return dest + 8;
#else
  dest = WriteHex4Impl<digit_case>(IntCast<uint16_t>(src >> 16), dest);
  return WriteHex4Impl<digit_case>(static_cast<uint16_t>(src), dest);
#endif
}

template <DigitCase digit_case>
inline char* WriteHex16Impl(uint64_t src, char* dest) {
#ifdef __SSSE3__
  _mm_storeu_si128(reinterpret_cast<__m128i*>(dest),
                   WriteHex16Impl<digit_case>(src));
  return dest + 16;
#else
  dest = WriteHex8Impl<digit_case>(IntCast<uint32_t>(src >> 32), dest);
  return WriteHex8Impl<digit_case>(static_cast<uint32_t>(src), dest);
#endif
}

template <DigitCase digit_case>
inline char* WriteHex32Impl(absl::uint128 src, char* dest) {
  dest = WriteHex16Impl<digit_case>(absl::Uint128High64(src), dest);
  return WriteHex16Impl<digit_case>(absl::Uint128Low64(src), dest);
}

// `WriteHexImpl()` writes at least `width` digits.

// Inline to optimize for a constant `width`.
template <DigitCase digit_case>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline char* WriteHexImpl(uint8_t src, char* dest,
                                                       size_t width) {
  if (src < uint8_t{1} << 4 && width <= 1) {
    return WriteHex1Impl<digit_case>(src, dest);
  }
  if (width > 2) {
    // Redundant condition suppresses gcc warning `-Wstringop-overflow`.
    std::memset(dest, '0', width > 2 ? width - 2 : 0);
    dest += width - 2;
  }
  return WriteHex2Impl<digit_case>(src, dest);
}

// Inline to optimize for a constant `width`.
template <DigitCase digit_case>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline char* WriteHexImpl(uint16_t src, char* dest,
                                                       size_t width) {
#ifdef __SSSE3__
  const __m128i out = WriteHex4Impl<digit_case>(src);
  if (src >= uint32_t{1} << 12 || width >= 4) {
    if (width > 4) {
      // Redundant condition suppresses gcc warning `-Wstringop-overflow`.
      std::memset(dest, '0', width > 4 ? width - 4 : 0);
      dest += width - 4;
    }
    _mm_storeu_si32(dest, out);
    return dest + 4;
  }
  char str[4];
  _mm_storeu_si32(str, out);
  width = UnsignedMax(
      width,
      (IntCast<size_t>(absl::bit_width(IntCast<uint16_t>(src | 1))) + 3) / 4);
  std::memcpy(dest, str + 4 - width, width);
  return dest + width;
#else
  if (src <= std::numeric_limits<uint8_t>::max()) {
    return WriteHexImpl<digit_case>(IntCast<uint8_t>(src), dest, width);
  }
  dest = WriteHexImpl<digit_case>(IntCast<uint8_t>(src >> 8), dest,
                                  SaturatingSub(width, size_t{2}));
  return WriteHex2Impl<digit_case>(static_cast<uint8_t>(src), dest);
#endif
}

// Inline to optimize for a constant `width`.
template <DigitCase digit_case>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline char* WriteHexImpl(uint32_t src, char* dest,
                                                       size_t width) {
#ifdef __SSSE3__
  const __m128i out = WriteHex8Impl<digit_case>(src);
  if (src >= uint32_t{1} << 28 || width >= 8) {
    if (width > 8) {
      // Redundant condition suppresses gcc warning `-Wstringop-overflow`.
      std::memset(dest, '0', width > 0 ? width - 8 : 0);
      dest += width - 8;
    }
    _mm_storeu_si64(dest, out);
    return dest + 8;
  }
  char str[8];
  _mm_storeu_si64(str, out);
  width =
      UnsignedMax(width, (IntCast<size_t>(absl::bit_width(src | 1)) + 3) / 4);
  std::memcpy(dest, str + 8 - width, width);
  return dest + width;
#else
  if (src <= std::numeric_limits<uint16_t>::max()) {
    return WriteHexImpl<digit_case>(IntCast<uint16_t>(src), dest, width);
  }
  dest = WriteHexImpl<digit_case>(IntCast<uint16_t>(src >> 16), dest,
                                  SaturatingSub(width, size_t{4}));
  return WriteHex4Impl<digit_case>(static_cast<uint16_t>(src), dest);
#endif
}

// Inline to optimize for a constant `width`.
template <DigitCase digit_case>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline char* WriteHexImpl(uint64_t src, char* dest,
                                                       size_t width) {
#ifdef __SSSE3__
  const __m128i out = WriteHex16Impl<digit_case>(src);
  if (src >= uint64_t{1} << 60 || width >= 16) {
    if (width > 16) {
      // Redundant condition suppresses gcc warning `-Wstringop-overflow`.
      std::memset(dest, '0', width > 16 ? width - 16 : 0);
      dest += width - 16;
    }
    _mm_storeu_si128(reinterpret_cast<__m128i*>(dest), out);
    return dest + 16;
  }
  alignas(16) char str[16];
  _mm_store_si128(reinterpret_cast<__m128i*>(str), out);
  width =
      UnsignedMax(width, (IntCast<size_t>(absl::bit_width(src | 1)) + 3) / 4);
  std::memcpy(dest, str + 16 - width, width);
  return dest + width;
#else
  if (src <= std::numeric_limits<uint32_t>::max()) {
    return WriteHexImpl<digit_case>(IntCast<uint32_t>(src), dest, width);
  }
  dest = WriteHexImpl<digit_case>(IntCast<uint32_t>(src >> 32), dest,
                                  SaturatingSub(width, size_t{8}));
  return WriteHex8Impl<digit_case>(static_cast<uint32_t>(src), dest);
#endif
}

// Inline to optimize for a constant `width`.
template <DigitCase digit_case>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline char* WriteHexImpl(absl::uint128 src,
                                                       char* dest,
                                                       size_t width) {
  if (absl::Uint128High64(src) == 0) {
    return WriteHexImpl<digit_case>(absl::Uint128Low64(src), dest, width);
  }
  dest = WriteHexImpl<digit_case>(absl::Uint128High64(src), dest,
                                  SaturatingSub(width, size_t{16}));
  return WriteHex16Impl<digit_case>(absl::Uint128Low64(src), dest);
}

}  // namespace

template <>
char* WriteHex2<DigitCase::kLower>(uint8_t src, char* dest) {
  return WriteHex2Impl<DigitCase::kLower>(src, dest);
}
template <>
char* WriteHex4<DigitCase::kLower>(uint16_t src, char* dest) {
  return WriteHex4Impl<DigitCase::kLower>(src, dest);
}
template <>
char* WriteHex8<DigitCase::kLower>(uint32_t src, char* dest) {
  return WriteHex8Impl<DigitCase::kLower>(src, dest);
}
template <>
char* WriteHex16<DigitCase::kLower>(uint64_t src, char* dest) {
  return WriteHex16Impl<DigitCase::kLower>(src, dest);
}
template <>
char* WriteHex32<DigitCase::kLower>(absl::uint128 src, char* dest) {
  return WriteHex32Impl<DigitCase::kLower>(src, dest);
}

template <>
char* WriteHex2<DigitCase::kUpper>(uint8_t src, char* dest) {
  return WriteHex2Impl<DigitCase::kUpper>(src, dest);
}
template <>
char* WriteHex4<DigitCase::kUpper>(uint16_t src, char* dest) {
  return WriteHex4Impl<DigitCase::kUpper>(src, dest);
}
template <>
char* WriteHex8<DigitCase::kUpper>(uint32_t src, char* dest) {
  return WriteHex8Impl<DigitCase::kUpper>(src, dest);
}
template <>
char* WriteHex16<DigitCase::kUpper>(uint64_t src, char* dest) {
  return WriteHex16Impl<DigitCase::kUpper>(src, dest);
}
template <>
char* WriteHex32<DigitCase::kUpper>(absl::uint128 src, char* dest) {
  return WriteHex32Impl<DigitCase::kUpper>(src, dest);
}

template <>
char* WriteHex<DigitCase::kLower>(uint8_t src, char* dest) {
  return WriteHexImpl<DigitCase::kLower>(src, dest, 0);
}
template <>
char* WriteHex<DigitCase::kLower>(uint16_t src, char* dest) {
  return WriteHexImpl<DigitCase::kLower>(src, dest, 0);
}
template <>
char* WriteHex<DigitCase::kLower>(uint32_t src, char* dest) {
  return WriteHexImpl<DigitCase::kLower>(src, dest, 0);
}
template <>
char* WriteHex<DigitCase::kLower>(uint64_t src, char* dest) {
  return WriteHexImpl<DigitCase::kLower>(src, dest, 0);
}
template <>
char* WriteHex<DigitCase::kLower>(absl::uint128 src, char* dest) {
  return WriteHexImpl<DigitCase::kLower>(src, dest, 0);
}

template <>
char* WriteHex<DigitCase::kUpper>(uint8_t src, char* dest) {
  return WriteHexImpl<DigitCase::kUpper>(src, dest, 0);
}
template <>
char* WriteHex<DigitCase::kUpper>(uint16_t src, char* dest) {
  return WriteHexImpl<DigitCase::kUpper>(src, dest, 0);
}
template <>
char* WriteHex<DigitCase::kUpper>(uint32_t src, char* dest) {
  return WriteHexImpl<DigitCase::kUpper>(src, dest, 0);
}
template <>
char* WriteHex<DigitCase::kUpper>(uint64_t src, char* dest) {
  return WriteHexImpl<DigitCase::kUpper>(src, dest, 0);
}
template <>
char* WriteHex<DigitCase::kUpper>(absl::uint128 src, char* dest) {
  return WriteHexImpl<DigitCase::kUpper>(src, dest, 0);
}

template <>
char* WriteHex<DigitCase::kLower>(uint8_t src, char* dest, size_t width) {
  return WriteHexImpl<DigitCase::kLower>(src, dest, width);
}
template <>
char* WriteHex<DigitCase::kLower>(uint16_t src, char* dest, size_t width) {
  return WriteHexImpl<DigitCase::kLower>(src, dest, width);
}
template <>
char* WriteHex<DigitCase::kLower>(uint32_t src, char* dest, size_t width) {
  return WriteHexImpl<DigitCase::kLower>(src, dest, width);
}
template <>
char* WriteHex<DigitCase::kLower>(uint64_t src, char* dest, size_t width) {
  return WriteHexImpl<DigitCase::kLower>(src, dest, width);
}
template <>
char* WriteHex<DigitCase::kLower>(absl::uint128 src, char* dest, size_t width) {
  return WriteHexImpl<DigitCase::kLower>(src, dest, width);
}

template <>
char* WriteHex<DigitCase::kUpper>(uint8_t src, char* dest, size_t width) {
  return WriteHexImpl<DigitCase::kUpper>(src, dest, width);
}
template <>
char* WriteHex<DigitCase::kUpper>(uint16_t src, char* dest, size_t width) {
  return WriteHexImpl<DigitCase::kUpper>(src, dest, width);
}
template <>
char* WriteHex<DigitCase::kUpper>(uint32_t src, char* dest, size_t width) {
  return WriteHexImpl<DigitCase::kUpper>(src, dest, width);
}
template <>
char* WriteHex<DigitCase::kUpper>(uint64_t src, char* dest, size_t width) {
  return WriteHexImpl<DigitCase::kUpper>(src, dest, width);
}
template <>
char* WriteHex<DigitCase::kUpper>(absl::uint128 src, char* dest, size_t width) {
  return WriteHexImpl<DigitCase::kUpper>(src, dest, width);
}

template <>
void WriteHexBackward2<DigitCase::kLower>(uint8_t src, char* dest) {
  WriteHex2Impl<DigitCase::kLower>(src, dest - 2);
}
template <>
void WriteHexBackward4<DigitCase::kLower>(uint16_t src, char* dest) {
  WriteHex4Impl<DigitCase::kLower>(src, dest - 4);
}
template <>
void WriteHexBackward8<DigitCase::kLower>(uint32_t src, char* dest) {
  WriteHex8Impl<DigitCase::kLower>(src, dest - 8);
}
template <>
void WriteHexBackward16<DigitCase::kLower>(uint64_t src, char* dest) {
  WriteHex16Impl<DigitCase::kLower>(src, dest - 16);
}
template <>
void WriteHexBackward32<DigitCase::kLower>(absl::uint128 src, char* dest) {
  WriteHex32Impl<DigitCase::kLower>(src, dest - 32);
}

template <>
void WriteHexBackward2<DigitCase::kUpper>(uint8_t src, char* dest) {
  WriteHex2Impl<DigitCase::kUpper>(src, dest - 2);
}
template <>
void WriteHexBackward4<DigitCase::kUpper>(uint16_t src, char* dest) {
  WriteHex4Impl<DigitCase::kUpper>(src, dest - 4);
}
template <>
void WriteHexBackward8<DigitCase::kUpper>(uint32_t src, char* dest) {
  WriteHex8Impl<DigitCase::kUpper>(src, dest - 8);
}
template <>
void WriteHexBackward16<DigitCase::kUpper>(uint64_t src, char* dest) {
  WriteHex16Impl<DigitCase::kUpper>(src, dest - 16);
}
template <>
void WriteHexBackward32<DigitCase::kUpper>(absl::uint128 src, char* dest) {
  WriteHex32Impl<DigitCase::kUpper>(src, dest - 32);
}

}  // namespace riegeli::write_int_internal


================================================
FILE: riegeli/text/write_int.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_TEXT_WRITE_INT_H_
#define RIEGELI_TEXT_WRITE_INT_H_

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <ostream>
#include <type_traits>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/numeric/bits.h"
#include "absl/numeric/int128.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/ostream_writer.h"
#include "riegeli/bytes/write_int_internal.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

// The type returned by `riegeli::Dec()`.
template <typename T>
class DecType {
 public:
  explicit DecType(T value, size_t width)
      : value_(std::move(value)), width_(width) {}

  const T& value() const { return value_; }
  size_t width() const { return width_; }

  template <typename Sink, typename DependentT = T,
            std::enable_if_t<IsInt<DependentT>::value, int> = 0>
  friend void AbslStringify(Sink& dest, const DecType& src) {
    src.Stringify(dest);
  }

  template <typename DependentT = T,
            std::enable_if_t<IsInt<DependentT>::value, int> = 0>
  friend std::ostream& operator<<(std::ostream& dest, const DecType& src) {
    OStreamWriter writer(&dest);
    src.WriteTo(writer);
    writer.Close();
    return dest;
  }

  template <typename DependentT = T,
            std::enable_if_t<IsInt<DependentT>::value, int> = 0>
  friend Position RiegeliStringifiedSize(const DecType& src) {
    return src.StringifiedSize();
  }

 private:
  template <typename Sink, typename DependentT = T,
            std::enable_if_t<IsUnsignedInt<DependentT>::value, int> = 0>
  void Stringify(Sink& dest) const;
  template <typename Sink, typename DependentT = T,
            std::enable_if_t<IsSignedInt<DependentT>::value, int> = 0>
  void Stringify(Sink& dest) const;

  // Faster implementation if `Sink` is `WriterStringifySink`.
  void Stringify(WriterStringifySink& dest) const { WriteTo(*dest.dest()); }

  template <typename DependentT = T,
            std::enable_if_t<IsUnsignedInt<DependentT>::value, int> = 0>
  void WriteTo(Writer& dest) const;
  template <typename DependentT = T,
            std::enable_if_t<IsSignedInt<DependentT>::value, int> = 0>
  void WriteTo(Writer& dest) const;

  template <typename DependentT = T,
            std::enable_if_t<IsUnsignedInt<DependentT>::value, int> = 0>
  Position StringifiedSize() const;
  template <typename DependentT = T,
            std::enable_if_t<IsSignedInt<DependentT>::value, int> = 0>
  Position StringifiedSize() const;

  T value_;
  size_t width_;
};

// Specialization of `DecType` for `char` which is written as unsigned.
template <>
class DecType<char> {
 public:
  explicit DecType(char value, size_t width) : value_(value), width_(width) {}

  const char& value() const { return value_; }
  size_t width() const { return width_; }

  template <typename Sink>
  friend void AbslStringify(Sink& dest, const DecType& src) {
    AbslStringify(
        dest, DecType<unsigned char>(static_cast<unsigned char>(src.value()),
                                     src.width()));
  }

  friend std::ostream& operator<<(std::ostream& dest, const DecType& src) {
    return dest << DecType<unsigned char>(
               static_cast<unsigned char>(src.value()), src.width());
  }

  friend Position RiegeliStringifiedSize(const DecType& src) {
    return RiegeliStringifiedSize(DecType<unsigned char>(
        static_cast<unsigned char>(src.value()), src.width()));
  }

 private:
  char value_;
  size_t width_;
};

// Wraps an integer such that its stringified representation is padded with
// zeros to at least the given width.
//
// For negative numbers the width includes the minus sign.
//
// `char` is stringified as unsigned.
//
// If no minimum width is needed, integers can be written to `riegeli::Writer`
// and `riegeli::BackwardWriter` directly, without wrapping, except that `bool`,
// `wchar_t`, `char16_t`, nor `char32_t` cannot be written directly, and for
// `char` and `char8_t` written directly the character itself is written, not
// its decimal representation.
template <typename T>
inline DecType<T> Dec(T value, size_t width = 0) {
  return DecType<T>(value, width);
}

enum class DigitCase {
  kLower,  // ['0'..'9'], ['a'..'f']
  kUpper,  // ['0'..'9'], ['A'..'F']
};

// The type returned by `riegeli::Hex()`.
template <typename T, DigitCase digit_case = DigitCase::kLower>
class HexType {
 public:
  explicit HexType(T value, size_t width)
      : value_(std::move(value)), width_(width) {}

  const T& value() const { return value_; }
  size_t width() const { return width_; }

  template <typename Sink, typename DependentT = T,
            std::enable_if_t<IsInt<DependentT>::value, int> = 0>
  friend void AbslStringify(Sink& dest, const HexType& src) {
    src.Stringify(dest);
  }

  template <typename DependentT = T,
            std::enable_if_t<IsInt<DependentT>::value, int> = 0>
  friend std::ostream& operator<<(std::ostream& dest, const HexType& src) {
    OStreamWriter writer(&dest);
    src.WriteTo(writer);
    writer.Close();
    return dest;
  }

  template <typename DependentT = T,
            std::enable_if_t<IsInt<DependentT>::value, int> = 0>
  friend Position RiegeliStringifiedSize(const HexType& src) {
    return src.StringifiedSize();
  }

 private:
  template <typename Sink, typename DependentT = T,
            std::enable_if_t<IsUnsignedInt<DependentT>::value, int> = 0>
  void Stringify(Sink& dest) const;
  template <typename Sink, typename DependentT = T,
            std::enable_if_t<IsSignedInt<DependentT>::value, int> = 0>
  void Stringify(Sink& dest) const;

  // Faster implementation if `Sink` is `WriterStringifySink`.
  void Stringify(WriterStringifySink& dest) const { WriteTo(*dest.dest()); }

  template <typename DependentT = T,
            std::enable_if_t<IsUnsignedInt<DependentT>::value, int> = 0>
  void WriteTo(Writer& dest) const;
  template <typename DependentT = T,
            std::enable_if_t<IsSignedInt<DependentT>::value, int> = 0>
  void WriteTo(Writer& dest) const;

  template <typename DependentT = T,
            std::enable_if_t<IsUnsignedInt<DependentT>::value, int> = 0>
  Position StringifiedSize() const;
  template <typename DependentT = T,
            std::enable_if_t<IsSignedInt<DependentT>::value, int> = 0>
  Position StringifiedSize() const;

  T value_;
  size_t width_;
};

// Specialization of `HexType` for `char` which is written as unsigned.
template <DigitCase digit_case>
class HexType<char, digit_case> {
 public:
  explicit HexType(char value, size_t width) : value_(value), width_(width) {}

  const char& value() const { return value_; }
  size_t width() const { return width_; }

  template <typename Sink>
  friend void AbslStringify(Sink& dest, const HexType& src) {
    AbslStringify(dest,
                  HexType<unsigned char, digit_case>(
                      static_cast<unsigned char>(src.value()), src.width()));
  }

  friend std::ostream& operator<<(std::ostream& dest, const HexType& src) {
    return dest << HexType<unsigned char, digit_case>(
               static_cast<unsigned char>(src.value()), src.width());
  }

  friend Position RiegeliStringifiedSize(const HexType& src) {
    return RiegeliStringifiedSize(HexType<unsigned char, digit_case>(
        static_cast<unsigned char>(src.value()), src.width()));
  }

 private:
  char value_;
  size_t width_;
};

// Wraps an integer such that its stringified representation is hexadecimal,
// with lower case digits, padded with zeros to at least the given width.
//
// For negative numbers the width includes the minus sign.
//
// `char` is written as unsigned.
template <typename T>
inline HexType<T> Hex(T value, size_t width = 0) {
  return HexType<T>(value, width);
}

// Wraps an integer such that its stringified representation is hexadecimal,
// with upper case digits, padded with zeros to at least the given width.
//
// For negative numbers the width includes the minus sign.
//
// `char` is written as unsigned.
template <typename T>
inline HexType<T, DigitCase::kUpper> HexUpperCase(T value, size_t width = 0) {
  return HexType<T, DigitCase::kUpper>(value, width);
}

// Implementation details follow.

namespace write_int_internal {

template <typename T, std::enable_if_t<FitsIn<T, uint8_t>::value, int> = 0>
inline size_t HexStringifiedSizeUnsigned(T src) {
  return IntCast<uint8_t>(src) < 0x10 ? size_t{1} : size_t{2};
}

template <typename T,
          std::enable_if_t<std::conjunction_v<std::negation<FitsIn<T, uint8_t>>,
                                              FitsIn<T, uint16_t>>,
                           int> = 0>
inline size_t HexStringifiedSizeUnsigned(T src) {
  return (IntCast<size_t>(absl::bit_width(IntCast<uint16_t>(src | 1))) + 3) / 4;
}

template <typename T, std::enable_if_t<
                          std::conjunction_v<std::negation<FitsIn<T, uint16_t>>,
                                             FitsIn<T, uint32_t>>,
                          int> = 0>
inline size_t HexStringifiedSizeUnsigned(T src) {
  return (IntCast<size_t>(absl::bit_width(IntCast<uint32_t>(src | 1))) + 3) / 4;
}

template <typename T, std::enable_if_t<
                          std::conjunction_v<std::negation<FitsIn<T, uint32_t>>,
                                             FitsIn<T, uint64_t>>,
                          int> = 0>
inline size_t HexStringifiedSizeUnsigned(T src) {
  return (IntCast<size_t>(absl::bit_width(IntCast<uint64_t>(src | 1))) + 3) / 4;
}

template <typename T, std::enable_if_t<
                          std::conjunction_v<std::negation<FitsIn<T, uint64_t>>,
                                             FitsIn<T, absl::uint128>>,
                          int> = 0>
inline size_t HexStringifiedSizeUnsigned(T src) {
  return (IntCast<size_t>(absl::Uint128High64(src) == 0
                              ? absl::bit_width(absl::Uint128Low64(src) | 1)
                              : absl::bit_width(absl::Uint128High64(src)) +
                                    64) +
          3) /
         4;
}

template <typename T>
inline size_t HexStringifiedSizeSigned(T src) {
  if (src >= 0) {
    return HexStringifiedSizeUnsigned(UnsignedCast(src));
  } else {
    return HexStringifiedSizeUnsigned(NegatingUnsignedCast(src)) + 1;
  }
}

// `WriteHex{2,4,8,16,32}()` writes a fixed number of digits.
template <DigitCase digit_case>
char* WriteHex2(uint8_t src, char* dest);
template <DigitCase digit_case>
char* WriteHex4(uint16_t src, char* dest);
template <DigitCase digit_case>
char* WriteHex8(uint32_t src, char* dest);
template <DigitCase digit_case>
char* WriteHex16(uint64_t src, char* dest);
template <DigitCase digit_case>
char* WriteHex32(absl::uint128 src, char* dest);

template <>
char* WriteHex2<DigitCase::kLower>(uint8_t src, char* dest);
template <>
char* WriteHex4<DigitCase::kLower>(uint16_t src, char* dest);
template <>
char* WriteHex8<DigitCase::kLower>(uint32_t src, char* dest);
template <>
char* WriteHex16<DigitCase::kLower>(uint64_t src, char* dest);
template <>
char* WriteHex32<DigitCase::kLower>(absl::uint128 src, char* dest);

template <>
char* WriteHex2<DigitCase::kUpper>(uint8_t src, char* dest);
template <>
char* WriteHex4<DigitCase::kUpper>(uint16_t src, char* dest);
template <>
char* WriteHex8<DigitCase::kUpper>(uint32_t src, char* dest);
template <>
char* WriteHex16<DigitCase::kUpper>(uint64_t src, char* dest);
template <>
char* WriteHex32<DigitCase::kUpper>(absl::uint128 src, char* dest);

// `WriteHex()` with no width parameter writes no leading zeros, except for 0
// itself.
template <DigitCase digit_case>
char* WriteHex(uint8_t src, char* dest);
template <DigitCase digit_case>
char* WriteHex(uint16_t src, char* dest);
template <DigitCase digit_case>
char* WriteHex(uint32_t src, char* dest);
template <DigitCase digit_case>
char* WriteHex(uint64_t src, char* dest);
template <DigitCase digit_case>
char* WriteHex(absl::uint128 src, char* dest);

template <>
char* WriteHex<DigitCase::kLower>(uint8_t src, char* dest);
template <>
char* WriteHex<DigitCase::kLower>(uint16_t src, char* dest);
template <>
char* WriteHex<DigitCase::kLower>(uint32_t src, char* dest);
template <>
char* WriteHex<DigitCase::kLower>(uint64_t src, char* dest);
template <>
char* WriteHex<DigitCase::kLower>(absl::uint128 src, char* dest);

template <>
char* WriteHex<DigitCase::kUpper>(uint8_t src, char* dest);
template <>
char* WriteHex<DigitCase::kUpper>(uint16_t src, char* dest);
template <>
char* WriteHex<DigitCase::kUpper>(uint32_t src, char* dest);
template <>
char* WriteHex<DigitCase::kUpper>(uint64_t src, char* dest);
template <>
char* WriteHex<DigitCase::kUpper>(absl::uint128 src, char* dest);

// `WriteHex()` with a width parameter writes at least `width` digits.
template <DigitCase digit_case>
char* WriteHex(uint8_t src, char* dest, size_t width);
template <DigitCase digit_case>
char* WriteHex(uint16_t src, char* dest, size_t width);
template <DigitCase digit_case>
char* WriteHex(uint32_t src, char* dest, size_t width);
template <DigitCase digit_case>
char* WriteHex(uint64_t src, char* dest, size_t width);
template <DigitCase digit_case>
char* WriteHex(absl::uint128 src, char* dest, size_t width);

template <>
char* WriteHex<DigitCase::kLower>(uint8_t src, char* dest, size_t width);
template <>
char* WriteHex<DigitCase::kLower>(uint16_t src, char* dest, size_t width);
template <>
char* WriteHex<DigitCase::kLower>(uint32_t src, char* dest, size_t width);
template <>
char* WriteHex<DigitCase::kLower>(uint64_t src, char* dest, size_t width);
template <>
char* WriteHex<DigitCase::kLower>(absl::uint128 src, char* dest, size_t width);

template <>
char* WriteHex<DigitCase::kUpper>(uint8_t src, char* dest, size_t width);
template <>
char* WriteHex<DigitCase::kUpper>(uint16_t src, char* dest, size_t width);
template <>
char* WriteHex<DigitCase::kUpper>(uint32_t src, char* dest, size_t width);
template <>
char* WriteHex<DigitCase::kUpper>(uint64_t src, char* dest, size_t width);
template <>
char* WriteHex<DigitCase::kUpper>(absl::uint128 src, char* dest, size_t width);

// `WriteHexUnsigned()` writes at least `width` digits.

template <DigitCase digit_case, typename T,
          std::enable_if_t<FitsIn<T, uint8_t>::value, int> = 0>
inline char* WriteHexUnsigned(T src, char* dest, size_t width) {
  return width == 2 ? WriteHex2<digit_case>(IntCast<uint8_t>(src), dest)
         : width <= 1
             ? WriteHex<digit_case>(IntCast<uint8_t>(src), dest)
             : WriteHex<digit_case>(IntCast<uint8_t>(src), dest, width);
}

template <DigitCase digit_case, typename T,
          std::enable_if_t<std::conjunction_v<std::negation<FitsIn<T, uint8_t>>,
                                              FitsIn<T, uint16_t>>,
                           int> = 0>
inline char* WriteHexUnsigned(T src, char* dest, size_t width) {
  return width == 4 ? WriteHex4<digit_case>(IntCast<uint16_t>(src), dest)
         : width <= 1
             ? WriteHex<digit_case>(IntCast<uint16_t>(src), dest)
             : WriteHex<digit_case>(IntCast<uint16_t>(src), dest, width);
}

template <
    DigitCase digit_case, typename T,
    std::enable_if_t<std::conjunction_v<std::negation<FitsIn<T, uint16_t>>,
                                        FitsIn<T, uint32_t>>,
                     int> = 0>
inline char* WriteHexUnsigned(T src, char* dest, size_t width) {
  return width == 8 ? WriteHex8<digit_case>(IntCast<uint32_t>(src), dest)
         : width <= 1
             ? WriteHex<digit_case>(IntCast<uint32_t>(src), dest)
             : WriteHex<digit_case>(IntCast<uint32_t>(src), dest, width);
}

template <
    DigitCase digit_case, typename T,
    std::enable_if_t<std::conjunction_v<std::negation<FitsIn<T, uint32_t>>,
                                        FitsIn<T, uint64_t>>,
                     int> = 0>
inline char* WriteHexUnsigned(T src, char* dest, size_t width) {
  return width == 16 ? WriteHex16<digit_case>(IntCast<uint64_t>(src), dest)
         : width <= 1
             ? WriteHex<digit_case>(IntCast<uint64_t>(src), dest)
             : WriteHex<digit_case>(IntCast<uint64_t>(src), dest, width);
}

template <
    DigitCase digit_case, typename T,
    std::enable_if_t<std::conjunction_v<std::negation<FitsIn<T, uint64_t>>,
                                        FitsIn<T, absl::uint128>>,
                     int> = 0>
inline char* WriteHexUnsigned(T src, char* dest, size_t width) {
  return width == 32 ? WriteHex32<digit_case>(IntCast<absl::uint128>(src), dest)
         : width <= 1
             ? WriteHex<digit_case>(IntCast<absl::uint128>(src), dest)
             : WriteHex<digit_case>(IntCast<absl::uint128>(src), dest, width);
}

// `WriteHexBackward{2,4,8,16,32}()` writes a fixed number of digits.
template <DigitCase digit_case>
void WriteHexBackward2(uint8_t src, char* dest);
template <DigitCase digit_case>
void WriteHexBackward4(uint16_t src, char* dest);
template <DigitCase digit_case>
void WriteHexBackward8(uint32_t src, char* dest);
template <DigitCase digit_case>
void WriteHexBackward16(uint64_t src, char* dest);
template <DigitCase digit_case>
void WriteHexBackward32(absl::uint128 src, char* dest);

template <>
void WriteHexBackward2<DigitCase::kLower>(uint8_t src, char* dest);
template <>
void WriteHexBackward4<DigitCase::kLower>(uint16_t src, char* dest);
template <>
void WriteHexBackward8<DigitCase::kLower>(uint32_t src, char* dest);
template <>
void WriteHexBackward16<DigitCase::kLower>(uint64_t src, char* dest);
template <>
void WriteHexBackward32<DigitCase::kLower>(absl::uint128 src, char* dest);

template <>
void WriteHexBackward2<DigitCase::kUpper>(uint8_t src, char* dest);
template <>
void WriteHexBackward4<DigitCase::kUpper>(uint16_t src, char* dest);
template <>
void WriteHexBackward8<DigitCase::kUpper>(uint32_t src, char* dest);
template <>
void WriteHexBackward16<DigitCase::kUpper>(uint64_t src, char* dest);
template <>
void WriteHexBackward32<DigitCase::kUpper>(absl::uint128 src, char* dest);

template <typename T>
constexpr size_t MaxLengthWriteHexUnsignedBackward() {
  return FitsIn<T, uint8_t>::value    ? 2
         : FitsIn<T, uint16_t>::value ? 4
         : FitsIn<T, uint32_t>::value ? 8
         : FitsIn<T, uint64_t>::value ? 16
                                      : 32;
}

// `WriteHexUnsignedBackward<T>()` writes at least `width` digits.
//
// `width` must be at most `MaxLengthWriteHexUnsignedBackward<T>()`, and that
// much space must be available before `dest`.

template <DigitCase digit_case, typename T,
          std::enable_if_t<FitsIn<T, uint8_t>::value, int> = 0>
inline char* WriteHexUnsignedBackward(T src, char* dest, size_t width) {
  RIEGELI_ASSERT_LE(width, 2u)
      << "Failed precondition of WriteHexUnsignedBackward(): width too large";
  WriteHexBackward2<digit_case>(IntCast<uint8_t>(src), dest);
  return dest - UnsignedMax(width, HexStringifiedSizeUnsigned(src));
}

template <DigitCase digit_case, typename T,
          std::enable_if_t<std::conjunction_v<std::negation<FitsIn<T, uint8_t>>,
                                              FitsIn<T, uint16_t>>,
                           int> = 0>
inline char* WriteHexUnsignedBackward(T src, char* dest, size_t width) {
  RIEGELI_ASSERT_LE(width, 4u)
      << "Failed precondition of WriteHexUnsignedBackward(): width too large";
  WriteHexBackward4<digit_case>(IntCast<uint16_t>(src), dest);
  return dest - UnsignedMax(width, HexStringifiedSizeUnsigned(src));
}

template <
    DigitCase digit_case, typename T,
    std::enable_if_t<std::conjunction_v<std::negation<FitsIn<T, uint16_t>>,
                                        FitsIn<T, uint32_t>>,
                     int> = 0>
inline char* WriteHexUnsignedBackward(T src, char* dest, size_t width) {
  RIEGELI_ASSERT_LE(width, 8u)
      << "Failed precondition of WriteHexUnsignedBackward(): width too large";
  WriteHexBackward8<digit_case>(IntCast<uint32_t>(src), dest);
  return dest - UnsignedMax(width, HexStringifiedSizeUnsigned(src));
}

template <
    DigitCase digit_case, typename T,
    std::enable_if_t<std::conjunction_v<std::negation<FitsIn<T, uint32_t>>,
                                        FitsIn<T, uint64_t>>,
                     int> = 0>
inline char* WriteHexUnsignedBackward(T src, char* dest, size_t width) {
  RIEGELI_ASSERT_LE(width, 16u)
      << "Failed precondition of WriteHexUnsignedBackward(): width too large";
  WriteHexBackward16<digit_case>(IntCast<uint64_t>(src), dest);
  return dest - UnsignedMax(width, HexStringifiedSizeUnsigned(src));
}

template <
    DigitCase digit_case, typename T,
    std::enable_if_t<std::conjunction_v<std::negation<FitsIn<T, uint64_t>>,
                                        FitsIn<T, absl::uint128>>,
                     int> = 0>
inline char* WriteHexUnsignedBackward(T src, char* dest, size_t width) {
  RIEGELI_ASSERT_LE(width, 32u)
      << "Failed precondition of WriteHexUnsignedBackward(): width too large";
  WriteHexBackward32<digit_case>(IntCast<absl::uint128>(src), dest);
  return dest - UnsignedMax(width, HexStringifiedSizeUnsigned(src));
}

}  // namespace write_int_internal

template <typename T>
template <typename Sink, typename DependentT,
          std::enable_if_t<IsUnsignedInt<DependentT>::value, int>>
inline void DecType<T>::Stringify(Sink& dest) const {
  // `digits10` is rounded down, `kMaxNumDigits` is rounded up, hence `+ 1`.
  constexpr size_t kMaxNumDigits = std::numeric_limits<T>::digits10 + 1;
  char str[kMaxNumDigits];
  char* const begin =
      write_int_internal::WriteDecUnsignedBackward(value_, str + kMaxNumDigits);
  const size_t length = PtrDistance(begin, str + kMaxNumDigits);
  if (width_ > length) dest.Append(width_ - length, '0');
  dest.Append(absl::string_view(begin, length));
}

template <typename T>
template <typename Sink, typename DependentT,
          std::enable_if_t<IsSignedInt<DependentT>::value, int>>
inline void DecType<T>::Stringify(Sink& dest) const {
  // `digits10` is rounded down, `kMaxNumDigits` is rounded up, hence `+ 1`.
  constexpr size_t kMaxNumDigits = std::numeric_limits<T>::digits10 + 1;
  // `+ 1` for the minus sign.
  char str[kMaxNumDigits + 1];
  char* begin;
  size_t length;
  if (value_ >= 0) {
    begin = write_int_internal::WriteDecUnsignedBackward(
        UnsignedCast(value_), str + (kMaxNumDigits + 1));
    length = PtrDistance(begin, str + (kMaxNumDigits + 1));
    if (width_ > length) dest.Append(width_ - length, '0');
  } else {
    // Leave space for the minus sign.
    begin = write_int_internal::WriteDecUnsignedBackward(
                NegatingUnsignedCast(value_), str + (kMaxNumDigits + 1)) -
            1;
    length = PtrDistance(begin, str + (kMaxNumDigits + 1));
    if (width_ > length) {
      dest.Append("-");
      dest.Append(width_ - length, '0');
      ++begin;
      --length;
    } else {
      *begin = '-';
    }
  }
  dest.Append(absl::string_view(begin, length));
}

template <typename T>
template <typename DependentT,
          std::enable_if_t<IsUnsignedInt<DependentT>::value, int>>
inline void DecType<T>::WriteTo(Writer& dest) const {
  // `digits10` is rounded down, `kMaxNumDigits` is rounded up, hence `+ 1`.
  constexpr size_t kMaxNumDigits = std::numeric_limits<T>::digits10 + 1;
  if (ABSL_PREDICT_FALSE(!dest.Push(UnsignedMax(width_, kMaxNumDigits)))) {
    return;
  }
  dest.set_cursor(
      write_int_internal::WriteDecUnsigned(value_, dest.cursor(), width_));
}

template <typename T>
template <typename DependentT,
          std::enable_if_t<IsSignedInt<DependentT>::value, int>>
inline void DecType<T>::WriteTo(Writer& dest) const {
  // `digits10` is rounded down, `kMaxNumDigits` is rounded up, hence `+ 1`.
  constexpr size_t kMaxNumDigits = std::numeric_limits<T>::digits10 + 1;
  // `+ 1` for the minus sign.
  if (ABSL_PREDICT_FALSE(!dest.Push(UnsignedMax(width_, kMaxNumDigits + 1)))) {
    return;
  }
  dest.set_cursor(
      write_int_internal::WriteDecSigned(value_, dest.cursor(), width_));
}

template <typename T>
template <typename DependentT,
          std::enable_if_t<IsUnsignedInt<DependentT>::value, int>>
inline Position DecType<T>::StringifiedSize() const {
  return UnsignedMax(width_,
                     write_int_internal::StringifiedSizeUnsigned(value_));
}

template <typename T>
template <typename DependentT,
          std::enable_if_t<IsSignedInt<DependentT>::value, int>>
inline Position DecType<T>::StringifiedSize() const {
  return UnsignedMax(width_, write_int_internal::StringifiedSizeSigned(value_));
}

template <typename T, DigitCase digit_case>
template <typename Sink, typename DependentT,
          std::enable_if_t<IsUnsignedInt<DependentT>::value, int>>
inline void HexType<T, digit_case>::Stringify(Sink& dest) const {
  constexpr size_t kMaxNumDigits =
      write_int_internal::MaxLengthWriteHexUnsignedBackward<T>();
  size_t width = width_;
  if (width > kMaxNumDigits) {
    dest.Append(width - kMaxNumDigits, '0');
    width = kMaxNumDigits;
  }
  char str[kMaxNumDigits];
  char* const begin = write_int_internal::WriteHexUnsignedBackward<digit_case>(
      value_, str + kMaxNumDigits, width);
  dest.Append(
      absl::string_view(begin, PtrDistance(begin, str + kMaxNumDigits)));
}

template <typename T, DigitCase digit_case>
template <typename Sink, typename DependentT,
          std::enable_if_t<IsSignedInt<DependentT>::value, int>>
inline void HexType<T, digit_case>::Stringify(Sink& dest) const {
  constexpr size_t kMaxNumDigits =
      write_int_internal::MaxLengthWriteHexUnsignedBackward<MakeUnsignedT<T>>();
  size_t width = width_;
  // `+ 1` for the minus sign.
  char str[kMaxNumDigits + 1];
  char* begin;
  if (value_ >= 0) {
    if (width > kMaxNumDigits) {
      dest.Append(width - kMaxNumDigits, '0');
      width = kMaxNumDigits;
    }
    begin = write_int_internal::WriteHexUnsignedBackward<digit_case>(
        UnsignedCast(value_), str + (kMaxNumDigits + 1), width);
  } else if (width > kMaxNumDigits + 1) {
    dest.Append("-");
    dest.Append(width - (kMaxNumDigits + 1), '0');
    begin = write_int_internal::WriteHexUnsignedBackward<digit_case>(
        NegatingUnsignedCast(value_), str + (kMaxNumDigits + 1), kMaxNumDigits);
  } else {
    begin = write_int_internal::WriteHexUnsignedBackward<digit_case>(
        NegatingUnsignedCast(value_), str + (kMaxNumDigits + 1),
        SaturatingSub(width, size_t{1}));
    --begin;
    *begin = '-';
  }
  dest.Append(
      absl::string_view(begin, PtrDistance(begin, str + (kMaxNumDigits + 1))));
}

template <typename T, DigitCase digit_case>
template <typename DependentT,
          std::enable_if_t<IsUnsignedInt<DependentT>::value, int>>
inline void HexType<T, digit_case>::WriteTo(Writer& dest) const {
  constexpr size_t kMaxNumDigits = (std::numeric_limits<T>::digits + 3) / 4;
  if (ABSL_PREDICT_FALSE(!dest.Push(UnsignedMax(width_, kMaxNumDigits)))) {
    return;
  }
  dest.set_cursor(write_int_internal::WriteHexUnsigned<digit_case>(
      value_, dest.cursor(), width_));
}

template <typename T, DigitCase digit_case>
template <typename DependentT,
          std::enable_if_t<IsSignedInt<DependentT>::value, int>>
inline void HexType<T, digit_case>::WriteTo(Writer& dest) const {
  constexpr size_t kMaxNumDigits = (std::numeric_limits<T>::digits + 3) / 4;
  // `+ 1` for the minus sign.
  if (ABSL_PREDICT_FALSE(!dest.Push(UnsignedMax(width_, kMaxNumDigits + 1)))) {
    return;
  }
  MakeUnsignedT<T> abs_value;
  char* cursor = dest.cursor();
  size_t width = width_;
  if (value_ >= 0) {
    abs_value = UnsignedCast(value_);
  } else {
    *cursor = '-';
    ++cursor;
    abs_value = NegatingUnsignedCast(value_);
    width = SaturatingSub(width, size_t{1});
  }
  dest.set_cursor(write_int_internal::WriteHexUnsigned<digit_case>(
      abs_value, cursor, width));
}

template <typename T, DigitCase digit_case>
template <typename DependentT,
          std::enable_if_t<IsUnsignedInt<DependentT>::value, int>>
inline Position HexType<T, digit_case>::StringifiedSize() const {
  constexpr size_t kMaxNumDigits = (std::numeric_limits<T>::digits + 3) / 4;
  if (width_ >= kMaxNumDigits) return width_;
  return UnsignedMax(width_,
                     write_int_internal::HexStringifiedSizeUnsigned(value_));
}

template <typename T, DigitCase digit_case>
template <typename DependentT,
          std::enable_if_t<IsSignedInt<DependentT>::value, int>>
inline Position HexType<T, digit_case>::StringifiedSize() const {
  return UnsignedMax(width_,
                     write_int_internal::HexStringifiedSizeSigned(value_));
}

}  // namespace riegeli

#endif  // RIEGELI_TEXT_WRITE_INT_H_


================================================
FILE: riegeli/varint/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "varint_reading",
    srcs = [
        "varint_internal.h",
        "varint_reading.cc",
    ],
    hdrs = ["varint_reading.h"],
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/bytes:reader",
        "@com_google_absl//absl/base:config",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/numeric:bits",
        "@com_google_absl//absl/strings:cord",
        "@com_google_absl//absl/strings:string_view",
    ],
)

cc_library(
    name = "varint_writing",
    srcs = ["varint_internal.h"],
    hdrs = ["varint_writing.h"],
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/base:constexpr",
        "//riegeli/bytes:backward_writer",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/numeric:bits",
    ],
)


================================================
FILE: riegeli/varint/varint_internal.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_VARINT_VARINT_INTERNAL_H_
#define RIEGELI_VARINT_VARINT_INTERNAL_H_

// IWYU pragma: private, include "riegeli/varint/varint_reading.h"
// IWYU pragma: private, include "riegeli/varint/varint_writing.h"

#include <stddef.h>

namespace riegeli {

inline constexpr size_t kMaxLengthVarint32 = 5;
inline constexpr size_t kMaxLengthVarint64 = 10;

}  // namespace riegeli

#endif  // RIEGELI_VARINT_VARINT_INTERNAL_H_


================================================
FILE: riegeli/varint/varint_reading.cc
================================================
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/varint/varint_reading.h"

#include <stddef.h>
#include <stdint.h>

#include <cstring>

#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/optimization.h"
#include "absl/numeric/bits.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

namespace {

template <typename T>
size_t kMaxLengthVarint;

template <>
constexpr size_t kMaxLengthVarint<uint32_t> = kMaxLengthVarint32;
template <>
constexpr size_t kMaxLengthVarint<uint64_t> = kMaxLengthVarint64;

template <typename T, size_t initial_index, size_t length>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline T ReadVarintValue(const char* src, T acc) {
  if constexpr (initial_index < length) {
    const T byte = T{static_cast<uint8_t>(src[initial_index])};
    acc += (byte - 1) << (initial_index * 7);
    return ReadVarintValue<T, initial_index + 1, length>(src, acc);
  } else {
    return acc;
  }
}

template <typename T, bool canonical, size_t initial_index, size_t index>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool ReadVarintFromReaderBufferLoop(
    Reader& src, const char* cursor, T acc, T& dest) {
  const T byte = T{static_cast<uint8_t>(cursor[index])};
  if constexpr (index == kMaxLengthVarint<T> - 1) {
    // Last possible byte.
    if (ABSL_PREDICT_FALSE(byte >= T{1} << (sizeof(T) * 8 - index * 7))) {
      // The representation is longer than `kMaxLengthVarint<T>`
      // or the represented value does not fit in `T`.
      return false;
    }
  } else if (byte >= 0x80) {
    return ReadVarintFromReaderBufferLoop<T, canonical, initial_index,
                                          index + 1>(src, cursor, acc, dest);
  }
  if constexpr (canonical) {
    if (ABSL_PREDICT_FALSE(byte == 0)) return false;
  }
  acc = ReadVarintValue<T, initial_index, index + 1>(cursor, acc);
  src.move_cursor(index + 1);
  dest = acc;
  return true;
}

template <typename T, bool canonical, size_t index>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool ReadVarintFromReaderLoop(Reader& src,
                                                                  T acc,
                                                                  T& dest) {
  if (ABSL_PREDICT_FALSE(!src.Pull(index + 1, kMaxLengthVarint<T>))) {
    return false;
  }
  const T byte = T{static_cast<uint8_t>(src.cursor()[index])};
  acc += (byte - 1) << (index * 7);
  if constexpr (index == kMaxLengthVarint<T> - 1) {
    // Last possible byte.
    if (ABSL_PREDICT_FALSE(byte >= T{1} << (sizeof(T) * 8 - index * 7))) {
      // The representation is longer than `kMaxLengthVarint<T>`
      // or the represented value does not fit in `T`.
      return false;
    }
  } else if (byte >= 0x80) {
    return ReadVarintFromReaderLoop<T, canonical, index + 1>(src, acc, dest);
  }
  if constexpr (canonical) {
    if (ABSL_PREDICT_FALSE(byte == 0)) return false;
  }
  src.move_cursor(index + 1);
  dest = acc;
  return true;
}

template <typename T, bool canonical, size_t initial_index, size_t index>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool ReadVarintFromCordBufferLoop(
    absl::Cord::CharIterator& src, const char* cursor, T acc, T& dest) {
  const T byte = T{static_cast<uint8_t>(cursor[index])};
  if constexpr (index == kMaxLengthVarint<T> - 1) {
    // Last possible byte.
    if (ABSL_PREDICT_FALSE(byte >= T{1} << (sizeof(T) * 8 - index * 7))) {
      // The representation is longer than `kMaxLengthVarint<T>`
      // or the represented value does not fit in `T`.
      return false;
    }
  } else if (byte >= 0x80) {
    return ReadVarintFromCordBufferLoop<T, canonical, initial_index, index + 1>(
        src, cursor, acc, dest);
  }
  if constexpr (canonical) {
    if (ABSL_PREDICT_FALSE(byte == 0)) return false;
  }
  acc = ReadVarintValue<T, initial_index, index + 1>(cursor, acc);
  absl::Cord::Advance(&src, index + 1);
  dest = acc;
  return true;
}

template <typename T, bool canonical, size_t index>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool ReadVarintFromCordLoop(
    absl::Cord::CharIterator& src, size_t available, T acc, T& dest) {
  RIEGELI_ASSERT_GT(available, index)
      << "Failed precondition of ReadVarintFromCordLoop(): not enough data";
  const T byte = T{static_cast<uint8_t>(*src)};
  acc += (byte - 1) << (index * 7);
  if constexpr (index == kMaxLengthVarint<T> - 1) {
    // Last possible byte.
    if (ABSL_PREDICT_FALSE(byte >= T{1} << (sizeof(T) * 8 - index * 7))) {
      // The representation is longer than `kMaxLengthVarint<T>`
      // or the represented value does not fit in `T`.
      return false;
    }
  } else if (byte >= 0x80) {
    if (ABSL_PREDICT_FALSE(available == index + 1)) return false;
    ++src;
    return ReadVarintFromCordLoop<T, canonical, index + 1>(src, available, acc,
                                                           dest);
  }
  if constexpr (canonical) {
    if (ABSL_PREDICT_FALSE(byte == 0)) return false;
  }
  ++src;
  dest = acc;
  return true;
}

template <typename T, bool canonical, size_t initial_index, size_t index>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline size_t ReadVarintFromArrayLoop(
    const char* src, size_t available, T acc, T& dest) {
  if (ABSL_PREDICT_FALSE(available == index)) return 0;
  const T byte = T{static_cast<uint8_t>(src[index])};
  if constexpr (index == kMaxLengthVarint<T> - 1) {
    // Last possible byte.
    if (ABSL_PREDICT_FALSE(byte >= T{1} << (sizeof(T) * 8 - index * 7))) {
      // The representation is longer than `kMaxLengthVarint<T>`
      // or the represented value does not fit in `T`.
      return 0;
    }
  } else if (byte >= 0x80) {
    return ReadVarintFromArrayLoop<T, canonical, initial_index, index + 1>(
        src, available, acc, dest);
  }
  if constexpr (canonical) {
    if (ABSL_PREDICT_FALSE(byte == 0)) return false;
  }
  dest = ReadVarintValue<T, initial_index, index + 1>(src, acc);
  return index + 1;
}

template <size_t initial_index, size_t length>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void CopyVarintValue(const char* src,
                                                         char* dest) {
  std::memcpy(dest + initial_index, src + initial_index,
              length - initial_index);
}

template <typename T, bool canonical, size_t initial_index, size_t index>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline size_t CopyVarintFromReaderBufferLoop(
    Reader& src, const char* cursor, char* dest) {
  const uint8_t byte = static_cast<uint8_t>(cursor[index]);
  if constexpr (index == kMaxLengthVarint<T> - 1) {
    // Last possible byte.
    if (ABSL_PREDICT_FALSE(byte >= T{1} << (sizeof(T) * 8 - index * 7))) {
      // The representation is longer than `kMaxLengthVarint<T>`
      // or the represented value does not fit in `T`.
      return 0;
    }
  } else if (byte >= 0x80) {
    return CopyVarintFromReaderBufferLoop<T, canonical, initial_index,
                                          index + 1>(src, cursor, dest);
  }
  if constexpr (canonical) {
    if (ABSL_PREDICT_FALSE(byte == 0)) return 0;
  }
  CopyVarintValue<initial_index, index + 1>(cursor, dest);
  src.move_cursor(index + 1);
  return index + 1;
}

template <typename T, bool canonical, size_t initial_index, size_t index>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline size_t CopyVarintFromReaderLoop(
    Reader& src, char* dest) {
  if (ABSL_PREDICT_FALSE(!src.Pull(index + 1, kMaxLengthVarint<T>))) return 0;
  const uint8_t byte = static_cast<uint8_t>(src.cursor()[index]);
  if constexpr (index == kMaxLengthVarint<T> - 1) {
    // Last possible byte.
    if (ABSL_PREDICT_FALSE(byte >= 1u << (sizeof(T) * 8 - index * 7))) {
      // The representation is longer than `kMaxLengthVarint<T>`
      // or the represented value does not fit in `T`.
      return 0;
    }
  } else if (byte >= 0x80) {
    return CopyVarintFromReaderLoop<T, canonical, initial_index, index + 1>(
        src, dest);
  }
  if constexpr (canonical) {
    if (ABSL_PREDICT_FALSE(byte == 0)) return 0;
  }
  CopyVarintValue<initial_index, index + 1>(src.cursor(), dest);
  src.move_cursor(index + 1);
  return index + 1;
}

template <typename T, bool canonical, size_t initial_index, size_t index>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline size_t CopyVarintFromCordBufferLoop(
    absl::Cord::CharIterator& src, const char* cursor, char* dest) {
  const uint8_t byte = static_cast<uint8_t>(cursor[index]);
  if constexpr (index == kMaxLengthVarint<T> - 1) {
    // Last possible byte.
    if (ABSL_PREDICT_FALSE(byte >= 1u << (sizeof(T) * 8 - index * 7))) {
      // The representation is longer than `kMaxLengthVarint<T>`
      // or the represented value does not fit in `T`.
      return 0;
    }
  } else if (byte >= 0x80) {
    return CopyVarintFromCordBufferLoop<T, canonical, initial_index, index + 1>(
        src, cursor, dest);
  }
  if constexpr (canonical) {
    if (ABSL_PREDICT_FALSE(byte == 0)) return 0;
  }
  CopyVarintValue<initial_index, index + 1>(cursor, dest);
  absl::Cord::Advance(&src, index + 1);
  return index + 1;
}

template <typename T, bool canonical, size_t index>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline size_t CopyVarintFromCordLoop(
    absl::Cord::CharIterator& src, size_t available, char* dest) {
  RIEGELI_ASSERT_GT(available, index)
      << "Failed precondition of CopyVarintFromCordLoop(): not enough data";
  const uint8_t byte = static_cast<uint8_t>(*src);
  dest[index] = static_cast<char>(byte);
  if constexpr (index == kMaxLengthVarint<T> - 1) {
    // Last possible byte.
    if (ABSL_PREDICT_FALSE(byte >= 1u << (sizeof(T) * 8 - index * 7))) {
      // The representation is longer than `kMaxLengthVarint<T>`
      // or the represented value does not fit in `T`.
      return 0;
    }
  } else if (byte >= 0x80) {
    if (ABSL_PREDICT_FALSE(available == index + 1)) return 0;
    ++src;
    return CopyVarintFromCordLoop<T, canonical, index + 1>(src, available,
                                                           dest);
  }
  if constexpr (canonical) {
    if (ABSL_PREDICT_FALSE(byte == 0)) return 0;
  }
  ++src;
  return index + 1;
}

template <typename T, bool canonical, size_t initial_index, size_t index>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline size_t CopyVarintFromArrayLoop(
    const char* src, size_t available, char* dest) {
  if (ABSL_PREDICT_FALSE(available == index)) return 0;
  const uint8_t byte = static_cast<uint8_t>(src[index]);
  if constexpr (index == kMaxLengthVarint<T> - 1) {
    // Last possible byte.
    if (ABSL_PREDICT_FALSE(byte >= 1u << (sizeof(T) * 8 - index * 7))) {
      // The representation is longer than `kMaxLengthVarint<T>`
      // or the represented value does not fit in `T`.
      return 0;
    }
  } else if (byte >= 0x80) {
    return CopyVarintFromArrayLoop<T, canonical, initial_index, index + 1>(
        src, available, dest);
  }
  if constexpr (canonical) {
    if (ABSL_PREDICT_FALSE(byte == 0)) return 0;
  }
  CopyVarintValue<initial_index, index + 1>(src, dest);
  return index + 1;
}

template <typename T, bool canonical, size_t index>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool SkipVarintFromReaderBufferLoop(
    Reader& src, const char* cursor) {
  const uint8_t byte = static_cast<uint8_t>(cursor[index]);
  if constexpr (index == kMaxLengthVarint<T> - 1) {
    // Last possible byte.
    if (ABSL_PREDICT_FALSE(byte >= T{1} << (sizeof(T) * 8 - index * 7))) {
      // The representation is longer than `kMaxLengthVarint<T>`
      // or the represented value does not fit in `T`.
      return false;
    }
  } else if (byte >= 0x80) {
    return SkipVarintFromReaderBufferLoop<T, canonical, index + 1>(src, cursor);
  }
  if constexpr (canonical) {
    if (ABSL_PREDICT_FALSE(byte == 0)) return false;
  }
  src.move_cursor(index + 1);
  return true;
}

template <typename T, bool canonical, size_t index>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool SkipVarintFromReaderLoop(Reader& src) {
  if (ABSL_PREDICT_FALSE(!src.Pull(index + 1, kMaxLengthVarint<T>))) {
    return false;
  }
  const uint8_t byte = static_cast<uint8_t>(src.cursor()[index]);
  if constexpr (index == kMaxLengthVarint<T> - 1) {
    // Last possible byte.
    if (ABSL_PREDICT_FALSE(byte >= 1u << (sizeof(T) * 8 - index * 7))) {
      // The representation is longer than `kMaxLengthVarint<T>`
      // or the represented value does not fit in `T`.
      return false;
    }
  } else if (byte >= 0x80) {
    return SkipVarintFromReaderLoop<T, canonical, index + 1>(src);
  }
  if constexpr (canonical) {
    if (ABSL_PREDICT_FALSE(byte == 0)) return false;
  }
  src.move_cursor(index + 1);
  return true;
}

template <typename T, bool canonical, size_t initial_index, size_t index>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool SkipVarintFromCordBufferLoop(
    absl::Cord::CharIterator& src, const char* cursor) {
  const T byte = T{static_cast<uint8_t>(cursor[index])};
  if constexpr (index == kMaxLengthVarint<T> - 1) {
    // Last possible byte.
    if (ABSL_PREDICT_FALSE(byte >= T{1} << (sizeof(T) * 8 - index * 7))) {
      // The representation is longer than `kMaxLengthVarint<T>`
      // or the represented value does not fit in `T`.
      return false;
    }
  } else if (byte >= 0x80) {
    return SkipVarintFromCordBufferLoop<T, canonical, initial_index, index + 1>(
        src, cursor);
  }
  if constexpr (canonical) {
    if (ABSL_PREDICT_FALSE(byte == 0)) return false;
  }
  absl::Cord::Advance(&src, index + 1);
  return true;
}

template <typename T, bool canonical, size_t index>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool SkipVarintFromCordLoop(
    absl::Cord::CharIterator& src, size_t available) {
  RIEGELI_ASSERT_GT(available, index)
      << "Failed precondition of SkipVarintFromCordLoop(): not enough data";
  const T byte = T{static_cast<uint8_t>(*src)};
  if constexpr (index == kMaxLengthVarint<T> - 1) {
    // Last possible byte.
    if (ABSL_PREDICT_FALSE(byte >= T{1} << (sizeof(T) * 8 - index * 7))) {
      // The representation is longer than `kMaxLengthVarint<T>`
      // or the represented value does not fit in `T`.
      return false;
    }
  } else if (byte >= 0x80) {
    if (ABSL_PREDICT_FALSE(available == index + 1)) return false;
    ++src;
    return SkipVarintFromCordLoop<T, canonical, index + 1>(src, available);
  }
  if constexpr (canonical) {
    if (ABSL_PREDICT_FALSE(byte == 0)) return false;
  }
  ++src;
  return true;
}

template <typename T, bool canonical, size_t index>
ABSL_ATTRIBUTE_ALWAYS_INLINE inline size_t SkipVarintFromArrayLoop(
    const char* src, size_t available) {
  if (ABSL_PREDICT_FALSE(available == index)) return 0;
  const uint8_t byte = static_cast<uint8_t>(src[index]);
  if constexpr (index == kMaxLengthVarint<T> - 1) {
    // Last possible byte.
    if (ABSL_PREDICT_FALSE(byte >= 1u << (sizeof(T) * 8 - index * 7))) {
      // The representation is longer than `kMaxLengthVarint<T>`
      // or the represented value does not fit in `T`.
      return 0;
    }
  } else if (byte >= 0x80) {
    return SkipVarintFromArrayLoop<T, canonical, index + 1>(src, available);
  }
  if constexpr (canonical) {
    if (ABSL_PREDICT_FALSE(byte == 0)) return 0;
  }
  return index + 1;
}

inline uint64_t ReadNativeEndian(const char* src) {
  uint64_t dest;
  std::memcpy(&dest, src, sizeof(dest));
  return dest;
}

}  // namespace

namespace varint_internal {

template <typename T, bool canonical, size_t initial_index>
bool ReadVarintFromReaderBuffer(Reader& src, const char* cursor, T acc,
                                T& dest) {
  RIEGELI_ASSERT_GE(src.available(), initial_index)
      << "Failed precondition of ReadVarintFromReaderBuffer(): "
         "not enough buffered data";
  if (ABSL_PREDICT_TRUE(src.available() >= kMaxLengthVarint<T>) ||
      static_cast<uint8_t>(src.limit()[-1]) < 0x80) {
    return ReadVarintFromReaderBufferLoop<T, canonical, initial_index,
                                          initial_index>(src, cursor, acc,
                                                         dest);
  }
  // Do not inline this call to avoid a frame pointer.
  return ReadVarintFromReader<T, canonical, initial_index>(src, acc, dest);
}

template bool ReadVarintFromReaderBuffer<uint32_t, false, 2>(Reader& src,
                                                             const char* cursor,
                                                             uint32_t acc,
                                                             uint32_t& dest);
template bool ReadVarintFromReaderBuffer<uint64_t, false, 2>(Reader& src,
                                                             const char* cursor,
                                                             uint64_t acc,
                                                             uint64_t& dest);
template bool ReadVarintFromReaderBuffer<uint32_t, true, 2>(Reader& src,
                                                            const char* cursor,
                                                            uint32_t acc,
                                                            uint32_t& dest);
template bool ReadVarintFromReaderBuffer<uint64_t, true, 2>(Reader& src,
                                                            const char* cursor,
                                                            uint64_t acc,
                                                            uint64_t& dest);

template <typename T, bool canonical, size_t initial_index>
bool ReadVarintFromReader(Reader& src, T acc, T& dest) {
  RIEGELI_ASSERT_GE(src.available(), initial_index)
      << "Failed precondition of ReadVarintFromReader(): "
         "not enough buffered data";
  return ReadVarintFromReaderLoop<T, canonical, initial_index>(src, acc, dest);
}

template bool ReadVarintFromReader<uint32_t, false, 1>(Reader& src,
                                                       uint32_t acc,
                                                       uint32_t& dest);
template bool ReadVarintFromReader<uint64_t, false, 1>(Reader& src,
                                                       uint64_t acc,
                                                       uint64_t& dest);
template bool ReadVarintFromReader<uint32_t, true, 1>(Reader& src, uint32_t acc,
                                                      uint32_t& dest);
template bool ReadVarintFromReader<uint64_t, true, 1>(Reader& src, uint64_t acc,
                                                      uint64_t& dest);

template <typename T, bool canonical, size_t initial_index>
bool ReadVarintFromCordBuffer(absl::Cord::CharIterator& src, size_t available,
                              T acc, T& dest) {
  const absl::string_view chunk = absl::Cord::ChunkRemaining(src);
  RIEGELI_ASSERT_GE(chunk.size(), initial_index)
      << "Failed precondition of ReadVarintFromCordBuffer(): "
         "not enough buffered data";
  const size_t available_in_buffer = UnsignedMin(available, chunk.size());
  if (ABSL_PREDICT_TRUE(available_in_buffer >= kMaxLengthVarint<T>) ||
      static_cast<uint8_t>(chunk[available_in_buffer - 1]) < 0x80) {
    return ReadVarintFromCordBufferLoop<T, canonical, initial_index,
                                        initial_index>(src, chunk.data(), acc,
                                                       dest);
  }
  if (ABSL_PREDICT_FALSE(available == initial_index)) return false;
  // Do not inline this call to avoid a frame pointer.
  return ReadVarintFromCord<T, canonical, initial_index>(src, available, acc,
                                                         dest);
}

template bool ReadVarintFromCordBuffer<uint32_t, false, 2>(
    absl::Cord::CharIterator& src, size_t available, uint32_t acc,
    uint32_t& dest);
template bool ReadVarintFromCordBuffer<uint64_t, false, 2>(
    absl::Cord::CharIterator& src, size_t available, uint64_t acc,
    uint64_t& dest);
template bool ReadVarintFromCordBuffer<uint32_t, true, 2>(
    absl::Cord::CharIterator& src, size_t available, uint32_t acc,
    uint32_t& dest);
template bool ReadVarintFromCordBuffer<uint64_t, true, 2>(
    absl::Cord::CharIterator& src, size_t available, uint64_t acc,
    uint64_t& dest);

template <typename T, bool canonical, size_t initial_index>
bool ReadVarintFromCord(absl::Cord::CharIterator& src, size_t available, T acc,
                        T& dest) {
  RIEGELI_ASSERT_GT(available, initial_index)
      << "Failed precondition of ReadVarintFromCord(): not enough data";
  absl::Cord::Advance(&src, initial_index);
  return ReadVarintFromCordLoop<T, canonical, initial_index>(src, available,
                                                             acc, dest);
}

template bool ReadVarintFromCord<uint32_t, false, 1>(
    absl::Cord::CharIterator& src, size_t available, uint32_t acc,
    uint32_t& dest);
template bool ReadVarintFromCord<uint64_t, false, 1>(
    absl::Cord::CharIterator& src, size_t available, uint64_t acc,
    uint64_t& dest);
template bool ReadVarintFromCord<uint32_t, true, 1>(
    absl::Cord::CharIterator& src, size_t available, uint32_t acc,
    uint32_t& dest);
template bool ReadVarintFromCord<uint64_t, true, 1>(
    absl::Cord::CharIterator& src, size_t available, uint64_t acc,
    uint64_t& dest);

template <typename T, bool canonical, size_t initial_index>
size_t ReadVarintFromArray(const char* src, size_t available, T acc, T& dest) {
  RIEGELI_ASSERT_GE(available, initial_index)
      << "Failed precondition of ReadVarintFromArray(): not enough data";
  return ReadVarintFromArrayLoop<T, canonical, initial_index, initial_index>(
      src, available, acc, dest);
}

template size_t ReadVarintFromArray<uint32_t, false, 2>(const char* src,
                                                        size_t available,
                                                        uint32_t acc,
                                                        uint32_t& dest);
template size_t ReadVarintFromArray<uint64_t, false, 2>(const char* src,
                                                        size_t available,
                                                        uint64_t acc,
                                                        uint64_t& dest);
template size_t ReadVarintFromArray<uint32_t, true, 2>(const char* src,
                                                       size_t available,
                                                       uint32_t acc,
                                                       uint32_t& dest);
template size_t ReadVarintFromArray<uint64_t, true, 2>(const char* src,
                                                       size_t available,
                                                       uint64_t acc,
                                                       uint64_t& dest);

template <typename T, bool canonical, size_t initial_index>
size_t CopyVarintFromReaderBuffer(Reader& src, const char* cursor, char* dest) {
  RIEGELI_ASSERT_GE(src.available(), initial_index)
      << "Failed precondition of CopyVarintFromReaderBuffer(): "
         "not enough buffered data";
  if (ABSL_PREDICT_TRUE(src.available() >= kMaxLengthVarint<T>) ||
      static_cast<uint8_t>(src.limit()[-1]) < 0x80) {
    return CopyVarintFromReaderBufferLoop<T, canonical, initial_index,
                                          initial_index>(src, cursor, dest);
  }
  // Do not inline this call to avoid a frame pointer.
  return CopyVarintFromReader<T, canonical, initial_index>(src, dest);
}

template size_t CopyVarintFromReaderBuffer<uint32_t, false, 2>(
    Reader& src, const char* cursor, char* dest);
template size_t CopyVarintFromReaderBuffer<uint64_t, false, 2>(
    Reader& src, const char* cursor, char* dest);
template size_t CopyVarintFromReaderBuffer<uint32_t, true, 2>(
    Reader& src, const char* cursor, char* dest);
template size_t CopyVarintFromReaderBuffer<uint64_t, true, 2>(
    Reader& src, const char* cursor, char* dest);

template <typename T, bool canonical, size_t initial_index>
size_t CopyVarintFromReader(Reader& src, char* dest) {
  RIEGELI_ASSERT_GE(src.available(), initial_index)
      << "Failed precondition of CopyVarintFromReader(): "
         "not enough buffered data";
  return CopyVarintFromReaderLoop<T, canonical, initial_index, initial_index>(
      src, dest);
}

template size_t CopyVarintFromReader<uint32_t, false, 1>(Reader& src,
                                                         char* dest);
template size_t CopyVarintFromReader<uint64_t, false, 1>(Reader& src,
                                                         char* dest);
template size_t CopyVarintFromReader<uint32_t, true, 1>(Reader& src,
                                                        char* dest);
template size_t CopyVarintFromReader<uint64_t, true, 1>(Reader& src,
                                                        char* dest);

template <typename T, bool canonical, size_t initial_index>
size_t CopyVarintFromCordBuffer(absl::Cord::CharIterator& src, size_t available,
                                char* dest) {
  const absl::string_view chunk = absl::Cord::ChunkRemaining(src);
  RIEGELI_ASSERT_GE(chunk.size(), initial_index)
      << "Failed precondition of CopyVarintFromCordBuffer(): "
         "not enough buffered data";
  const size_t available_in_buffer = UnsignedMin(available, chunk.size());
  if (ABSL_PREDICT_TRUE(available_in_buffer >= kMaxLengthVarint<T>) ||
      static_cast<uint8_t>(chunk[available_in_buffer - 1]) < 0x80) {
    return CopyVarintFromCordBufferLoop<T, canonical, initial_index,
                                        initial_index>(src, chunk.data(), dest);
  }
  if (ABSL_PREDICT_FALSE(available == initial_index)) return 0;
  // Do not inline this call to avoid a frame pointer.
  return CopyVarintFromCord<T, canonical, initial_index>(src, available, dest);
}

template size_t CopyVarintFromCordBuffer<uint32_t, false, 2>(
    absl::Cord::CharIterator& src, size_t available, char* dest);
template size_t CopyVarintFromCordBuffer<uint64_t, false, 2>(
    absl::Cord::CharIterator& src, size_t available, char* dest);
template size_t CopyVarintFromCordBuffer<uint32_t, true, 2>(
    absl::Cord::CharIterator& src, size_t available, char* dest);
template size_t CopyVarintFromCordBuffer<uint64_t, true, 2>(
    absl::Cord::CharIterator& src, size_t available, char* dest);

template <typename T, bool canonical, size_t initial_index>
size_t CopyVarintFromCord(absl::Cord::CharIterator& src, size_t available,
                          char* dest) {
  RIEGELI_ASSERT_GT(available, initial_index)
      << "Failed precondition of CopyVarintFromCord(): not enough data";
  absl::Cord::Advance(&src, initial_index);
  return CopyVarintFromCordLoop<T, canonical, initial_index>(src, available,
                                                             dest);
}

template size_t CopyVarintFromCord<uint32_t, false, 1>(
    absl::Cord::CharIterator& src, size_t available, char* dest);
template size_t CopyVarintFromCord<uint64_t, false, 1>(
    absl::Cord::CharIterator& src, size_t available, char* dest);
template size_t CopyVarintFromCord<uint32_t, true, 1>(
    absl::Cord::CharIterator& src, size_t available, char* dest);
template size_t CopyVarintFromCord<uint64_t, true, 1>(
    absl::Cord::CharIterator& src, size_t available, char* dest);

template <typename T, bool canonical, size_t initial_index>
size_t CopyVarintFromArray(const char* src, size_t available, char* dest) {
  RIEGELI_ASSERT_GE(available, initial_index)
      << "Failed precondition of CopyVarintFromArray(): not enough data";
  return CopyVarintFromArrayLoop<T, canonical, initial_index, initial_index>(
      src, available, dest);
}

template size_t CopyVarintFromArray<uint32_t, false, 2>(const char* src,
                                                        size_t available,
                                                        char* dest);
template size_t CopyVarintFromArray<uint64_t, false, 2>(const char* src,
                                                        size_t available,
                                                        char* dest);
template size_t CopyVarintFromArray<uint32_t, true, 2>(const char* src,
                                                       size_t available,
                                                       char* dest);
template size_t CopyVarintFromArray<uint64_t, true, 2>(const char* src,
                                                       size_t available,
                                                       char* dest);

template <typename T, bool canonical, size_t initial_index>
bool SkipVarintFromReaderBuffer(Reader& src, const char* cursor) {
  RIEGELI_ASSERT_GE(src.available(), initial_index)
      << "Failed precondition of SkipVarintFromReaderBuffer(): "
         "not enough buffered data";
  if (ABSL_PREDICT_TRUE(src.available() >= kMaxLengthVarint<T>) ||
      static_cast<uint8_t>(src.limit()[-1]) < 0x80) {
    return SkipVarintFromReaderBufferLoop<T, canonical, initial_index>(src,
                                                                       cursor);
  }
  // Do not inline this call to avoid a frame pointer.
  return SkipVarintFromReader<T, canonical, initial_index>(src);
}

template bool SkipVarintFromReaderBuffer<uint32_t, false, 2>(
    Reader& src, const char* cursor);
template bool SkipVarintFromReaderBuffer<uint64_t, false, 2>(
    Reader& src, const char* cursor);
template bool SkipVarintFromReaderBuffer<uint32_t, true, 2>(Reader& src,
                                                            const char* cursor);
template bool SkipVarintFromReaderBuffer<uint64_t, true, 2>(Reader& src,
                                                            const char* cursor);

template <typename T, bool canonical, size_t initial_index>
bool SkipVarintFromReader(Reader& src) {
  RIEGELI_ASSERT_GE(src.available(), initial_index)
      << "Failed precondition of SkipVarintFromReader(): "
         "not enough buffered data";
  return SkipVarintFromReaderLoop<T, canonical, initial_index>(src);
}

template bool SkipVarintFromReader<uint32_t, false, 1>(Reader& src);
template bool SkipVarintFromReader<uint64_t, false, 1>(Reader& src);
template bool SkipVarintFromReader<uint32_t, true, 1>(Reader& src);
template bool SkipVarintFromReader<uint64_t, true, 1>(Reader& src);

template <typename T, bool canonical, size_t initial_index>
bool SkipVarintFromCordBuffer(absl::Cord::CharIterator& src, size_t available) {
  const absl::string_view chunk = absl::Cord::ChunkRemaining(src);
  RIEGELI_ASSERT_GE(chunk.size(), initial_index)
      << "Failed precondition of SkipVarintFromCordBuffer(): "
         "not enough buffered data";
  const size_t available_in_buffer = UnsignedMin(available, chunk.size());
  if (ABSL_PREDICT_TRUE(available_in_buffer >= kMaxLengthVarint<T>) ||
      static_cast<uint8_t>(chunk[available_in_buffer - 1]) < 0x80) {
    return SkipVarintFromCordBufferLoop<T, canonical, initial_index,
                                        initial_index>(src, chunk.data());
  }
  if (ABSL_PREDICT_FALSE(available == initial_index)) return false;
  // Do not inline this call to avoid a frame pointer.
  return SkipVarintFromCord<T, canonical, initial_index>(src, available);
}

template bool SkipVarintFromCordBuffer<uint32_t, false, 2>(
    absl::Cord::CharIterator& src, size_t available);
template bool SkipVarintFromCordBuffer<uint64_t, false, 2>(
    absl::Cord::CharIterator& src, size_t available);
template bool SkipVarintFromCordBuffer<uint32_t, true, 2>(
    absl::Cord::CharIterator& src, size_t available);
template bool SkipVarintFromCordBuffer<uint64_t, true, 2>(
    absl::Cord::CharIterator& src, size_t available);

template <typename T, bool canonical, size_t initial_index>
bool SkipVarintFromCord(absl::Cord::CharIterator& src, size_t available) {
  RIEGELI_ASSERT_GT(available, initial_index)
      << "Failed precondition of SkipVarintFromCord(): not enough data";
  absl::Cord::Advance(&src, initial_index);
  return SkipVarintFromCordLoop<T, canonical, initial_index>(src, available);
}

template bool SkipVarintFromCord<uint32_t, false, 1>(
    absl::Cord::CharIterator& src, size_t available);
template bool SkipVarintFromCord<uint64_t, false, 1>(
    absl::Cord::CharIterator& src, size_t available);
template bool SkipVarintFromCord<uint32_t, true, 1>(
    absl::Cord::CharIterator& src, size_t available);
template bool SkipVarintFromCord<uint64_t, true, 1>(
    absl::Cord::CharIterator& src, size_t available);

template <typename T, bool canonical, size_t initial_index>
size_t SkipVarintFromArray(const char* src, size_t available) {
  RIEGELI_ASSERT_GE(available, initial_index)
      << "Failed precondition of SkipVarintFromArray(): not enough data";
  return SkipVarintFromArrayLoop<T, canonical, initial_index>(src, available);
}

template size_t SkipVarintFromArray<uint32_t, false, 2>(const char* src,
                                                        size_t available);
template size_t SkipVarintFromArray<uint64_t, false, 2>(const char* src,
                                                        size_t available);
template size_t SkipVarintFromArray<uint32_t, true, 2>(const char* src,
                                                       size_t available);
template size_t SkipVarintFromArray<uint64_t, true, 2>(const char* src,
                                                       size_t available);

}  // namespace varint_internal

size_t CountVarints(absl::string_view value) {
  // The number of varints is the number of bytes with the highest bit clear.
  // This is easier to compute as the total number of bytes, minus the number
  // of bytes with the highest bit set.
  size_t num_varints = value.size();
  if (value.size() < sizeof(uint64_t)) {
    // Count byte by byte.
    for (const char byte : value) {
      num_varints -= static_cast<uint8_t>(byte) >> 7;
    }
    return num_varints;
  }

  // Count in whole blocks, except for the last one.
  const char* const limit = value.data() + value.size() - sizeof(uint64_t);
  const char* cursor = value.data();
  while (cursor < limit) {
    const uint64_t block = ReadNativeEndian(cursor);
    num_varints -=
        IntCast<size_t>(absl::popcount(block & uint64_t{0x8080808080808080}));
    cursor += 8;
  }

  // Count in the last, possibly incomplete block.
  const uint64_t block = ReadNativeEndian(limit);
  uint64_t mask = uint64_t{0x8080808080808080};
#if ABSL_IS_LITTLE_ENDIAN
  mask <<= PtrDistance(limit, cursor) * 8;
#elif ABSL_IS_BIG_ENDIAN
  mask >>= PtrDistance(limit, cursor) * 8;
#else
#error Unknown endianness
#endif
  num_varints -= IntCast<size_t>(absl::popcount(block & mask));

  return num_varints;
}

bool VerifyBools(absl::string_view value) {
  uint64_t bit_or = 0;
  if (value.size() < sizeof(uint64_t)) {
    // Verify byte by byte.
    for (const char byte : value) {
      bit_or |= static_cast<uint8_t>(byte);
    }
    return bit_or <= 1;
  }

  // Verify whole blocks, except for the last one.
  const char* const limit = value.data() + value.size() - sizeof(uint64_t);
  const char* cursor = value.data();
  while (cursor < limit) {
    bit_or |= ReadNativeEndian(cursor);
    cursor += 8;
  }
  // Verify the last, possibly incomplete block.
  bit_or |= ReadNativeEndian(limit);

  return (bit_or & ~uint64_t{0x0101010101010101}) == 0;
}

}  // namespace riegeli


================================================
FILE: riegeli/varint/varint_reading.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_VARINT_VARINT_READING_H_
#define RIEGELI_VARINT_VARINT_READING_H_

#include <stddef.h>
#include <stdint.h>

#include <optional>

#include "absl/base/optimization.h"
#include "absl/strings/cord.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/varint/varint_internal.h"  // IWYU pragma: export

namespace riegeli {

// Unless stated otherwise, reading a varint tolerates representations
// which are not the shortest, but rejects representations longer than
// `kMaxLengthVarint{32,64}` bytes or with bits set outside the range of
// possible values.

// Reads a varint. This corresponds to protobuf types `{int,uint}{32,64}`
// (with a cast needed in the case of `int{32,64}`).
//
// Warning: protobuf writes values of type `int32` by casting them to `uint64`,
// not `uint32` (negative values take 10 bytes, not 5), hence they must be read
// with `ReadVarint64()`, not `ReadVarint32()`, if negative values are possible.
//
// Return values:
//  * `true`                     - success (`dest` is set)
//  * `false` (when `src.ok()`)  - source ends too early or varint is invalid
//                                 (`src` position is unchanged,
//                                 `dest` is undefined)
//  * `false` (when `!src.ok()`) - failure
//                                 (`src` position is unchanged,
//                                 `dest` is undefined)
bool ReadVarint32(Reader& src, uint32_t& dest);
bool ReadVarint64(Reader& src, uint64_t& dest);

// Reads a signed varint (zigzag-encoded). This corresponds to protobuf types
// `sint{32,64}`.
//
// Return values:
//  * `true`                     - success (`dest` is set)
//  * `false` (when `src.ok()`)  - source ends too early or varint is invalid
//                                 (`src` position is unchanged,
//                                 `dest` is undefined)
//  * `false` (when `!src.ok()`) - failure
//                                 (`src` position is unchanged,
//                                 `dest` is undefined)
bool ReadVarintSigned32(Reader& src, uint32_t& dest);
bool ReadVarintSigned64(Reader& src, uint64_t& dest);

// Reads a varint. This corresponds to protobuf types `{int,uint}{32,64}`
// (with a cast needed in the case of `int{32,64}`).
//
// Accepts only the canonical representation, i.e. the shortest: rejecting a
// trailing zero byte, except for 0 itself.
//
// Warning: protobuf writes values of type `int32` by casting them to `uint64`,
// not `uint32` (negative values take 10 bytes, not 5), hence they must be read
// with `ReadCanonicalVarint64()`, not `ReadCanonicalVarint32()`, if negative
// values are possible.
//
// Return values:
//  * `true`                     - success (`dest` is set)
//  * `false` (when `src.ok()`)  - source ends too early or varint is invalid
//                                 (`src` position is unchanged,
//                                 `dest` is undefined)
//  * `false` (when `!src.ok()`) - failure
//                                 (`src` position is unchanged,
//                                 `dest` is undefined)
bool ReadCanonicalVarint32(Reader& src, uint32_t& dest);
bool ReadCanonicalVarint64(Reader& src, uint64_t& dest);

// Reads a varint, at most `available` bytes long. This corresponds to protobuf
// types `{int,uint}{32,64}` (with a cast needed in the case of `int{32,64}`).
//
// Warning: protobuf writes values of type `int32` by casting them to `uint64`,
// not `uint32` (negative values take 10 bytes, not 5), hence they must be read
// with `ReadVarint64()`, not `ReadVarint32()`, if negative values are possible.
//
// Return values:
//  * `true`  - success (`dest` is set)
//  * `false` - source ends too early or varint is invalid
//              (`src` moved less than `available` unless `available == 0`,
//              `dest` is undefined)
bool ReadVarint32(absl::Cord::CharIterator& src, size_t available,
                  uint32_t& dest);
bool ReadVarint64(absl::Cord::CharIterator& src, size_t available,
                  uint64_t& dest);

// Reads a signed varint (zigzag-encoded), at most `available` bytes long.
// This corresponds to protobuf types `sint{32,64}`.
//
// Return values:
//  * `true`  - success (`dest` is set)
//  * `false` - source ends too early or varint is invalid
//              (`src` moved less than `available` unless `available == 0`,
//              `dest` is undefined)
bool ReadVarintSigned32(absl::Cord::CharIterator& src, size_t available,
                        uint32_t& dest);
bool ReadVarintSigned64(absl::Cord::CharIterator& src, size_t available,
                        uint64_t& dest);

// Reads a varint, at most `available` bytes long. This corresponds to protobuf
// types `{int,uint}{32,64}` (with a cast needed in the case of `int{32,64}`).
//
// Accepts only the canonical representation, i.e. the shortest: rejecting a
// trailing zero byte, except for 0 itself.
//
// Warning: protobuf writes values of type `int32` by casting them to `uint64`,
// not `uint32` (negative values take 10 bytes, not 5), hence they must be read
// with `ReadCanonicalVarint64()`, not `ReadCanonicalVarint32()`, if negative
// values are possible.
//
// Return values:
//  * `true`  - success (`dest` is set)
//  * `false` - source ends too early or varint is invalid
//              (`src` moved less than `available` unless `available == 0`,
//              `dest` is undefined)
bool ReadCanonicalVarint32(absl::Cord::CharIterator& src, size_t available,
                           uint32_t& dest);
bool ReadCanonicalVarint64(absl::Cord::CharIterator& src, size_t available,
                           uint64_t& dest);

// Reads a varint from an array. This corresponds to protobuf types
// `{int,uint}{32,64}` (with a cast needed in the case of `int{32,64}`).
//
// Warning: protobuf writes values of type `int32` by casting them to `uint64`,
// not `uint32` (negative values take 10 bytes, not 5), hence they must be read
// with `ReadVarint64()`, not `ReadVarint32()`, if negative values are possible.
//
// Return values:
//  * positive `length` - success, `length` bytes read (`dest` is set)
//  * 0                 - source ends too early or varint is invalid
//                        (`dest` is undefined)
size_t ReadVarint32(const char* src, size_t available, uint32_t& dest);
size_t ReadVarint64(const char* src, size_t available, uint64_t& dest);

// Reads a signed varint (zigzag-encoded) from an array. This corresponds to
// protobuf types `sint{32,64}`.
//
// Return values:
//  * positive `length` - success, `length` bytes read (`dest` is set)
//  * 0                 - source ends too early or varint is invalid
//                        (`dest` is undefined)
size_t ReadVarintSigned32(const char* src, size_t available, int32_t& dest);
size_t ReadVarintSigned64(const char* src, size_t available, int64_t& dest);

// Reads a varint from an array. This corresponds to protobuf types
// `{int,uint}{32,64}` (with a cast needed in the case of `int{32,64}`).
//
// Accepts only the canonical representation, i.e. the shortest: rejecting a
// trailing zero byte, except for 0 itself.
//
// Warning: protobuf writes values of type `int32` by casting them to `uint64`,
// not `uint32` (negative values take 10 bytes, not 5), hence they must be read
// with `ReadVarint64()`, not `ReadVarint32()`, if negative values are possible.
//
// Return values:
//  * positive `length` - success, `length` bytes read (`dest` is set)
//  * 0                 - source ends too early or varint is invalid
//                        (`dest` is undefined)
size_t ReadCanonicalVarint32(const char* src, size_t available, uint32_t& dest);
size_t ReadCanonicalVarint64(const char* src, size_t available, uint64_t& dest);

// Copies a varint to an array, without decoding and encoding but with
// validation.
//
// Writes up to `kMaxLengthVarint{32,64}` bytes to `dest[]`.
//
// Return values:
//  * positive `length`    - success, `length` bytes copied (`dest[]` is filled)
//  * 0 (when `src.ok()`)  - source ends too early or varint is invalid
//                           (`src` position is unchanged,
//                           `dest[]` is undefined)
//  * 0 (when `!src.ok()`) - failure
//                           (`src` position is unchanged,
//                           `dest[]` is undefined)
size_t CopyVarint32(Reader& src, char* dest);
size_t CopyVarint64(Reader& src, char* dest);

// Copies a varint to an array, without decoding and encoding but with
// validation.
//
// Accepts only the canonical representation, i.e. the shortest: rejecting a
// trailing zero byte, except for 0 itself.
//
// Writes up to `kMaxLengthVarint{32,64}` bytes to `dest[]`.
//
// Return values:
//  * positive `length`    - success, `length` bytes copied (`dest[]` is filled)
//  * 0 (when `src.ok()`)  - source ends too early or varint is invalid
//                           (`src` position is unchanged,
//                           `dest[]` is undefined)
//  * 0 (when `!src.ok()`) - failure
//                           (`src` position is unchanged,
//                           `dest[]` is undefined)
size_t CopyCanonicalVarint32(Reader& src, char* dest);
size_t CopyCanonicalVarint64(Reader& src, char* dest);

// Copies a varint to an array, at most `available` bytes long, without decoding
// and encoding but with validation.
//
// Writes up to `kMaxLengthVarint{32,64}` bytes to `dest[]`.
//
// Return values:
//  * positive `length` - success, `length` bytes copied (`dest[]` is filled)
//  * 0                 - source ends too early or varint is invalid
//                        (`src` moved less than `available`
//                        unless `available == 0`,
//                        `dest[]` is undefined)
size_t CopyVarint32(absl::Cord::CharIterator& src, size_t available,
                    char* dest);
size_t CopyVarint64(absl::Cord::CharIterator& src, size_t available,
                    char* dest);

// Copies a varint to an array, at most `available` bytes long, without decoding
// and encoding but with validation.
//
// Accepts only the canonical representation, i.e. the shortest: rejecting a
// trailing zero byte, except for 0 itself.
//
// Writes up to `kMaxLengthVarint{32,64}` bytes to `dest[]`.
//
// Return values:
//  * positive `length` - success, `length` bytes copied (`dest[]` is filled)
//  * 0                 - source ends too early or varint is invalid
//                        (`src` moved less than `available`
//                        unless `available == 0`,
//                        `dest[]` is undefined)
size_t CopyCanonicalVarint32(absl::Cord::CharIterator& src, size_t available,
                             char* dest);
size_t CopyCanonicalVarint64(absl::Cord::CharIterator& src, size_t available,
                             char* dest);

// Copies a varint from an array to an array, without decoding and encoding but
// with validation.
//
// Writes up to `kMaxLengthVarint{32,64}` bytes to `dest[]`.
//
// Return values:
//  * positive `length` - success, `length` bytes copied (`dest[]` is filled)
//  * 0                 - source ends too early or varint is invalid
//                        (`dest[]` is undefined)
size_t CopyVarint32(const char* src, size_t available, char* dest);
size_t CopyVarint64(const char* src, size_t available, char* dest);

// Copies a varint from an array to an array, without decoding and encoding but
// with validation.
//
// Accepts only the canonical representation, i.e. the shortest: rejecting a
// trailing zero byte, except for 0 itself.
//
// Writes up to `kMaxLengthVarint{32,64}` bytes to `dest[]`.
//
// Return values:
//  * positive `length` - success, `length` bytes copied (`dest[]` is filled)
//  * 0                 - source ends too early or varint is invalid
//                        (`dest[]` is undefined)
size_t CopyCanonicalVarint32(const char* src, size_t available, char* dest);
size_t CopyCanonicalVarint64(const char* src, size_t available, char* dest);

// Skips a varint, without decoding but with validation.
//
// Return values:
//  * `true`                     - success
//  * `false` (when `src.ok()`)  - source ends too early or varint is invalid
//                                 (`src` position is unchanged)
//  * `false` (when `!src.ok()`) - failure
//                                 (`src` position is unchanged)
bool SkipVarint32(Reader& src);
bool SkipVarint64(Reader& src);

// Skips a varint, without decoding but with validation.
//
// Accepts only the canonical representation, i.e. the shortest: rejecting a
// trailing zero byte, except for 0 itself.
//
// Return values:
//  * `true`                     - success
//  * `false` (when `src.ok()`)  - source ends too early or varint is invalid
//                                 (`src` position is unchanged)
//  * `false` (when `!src.ok()`) - failure
//                                 (`src` position is unchanged)
bool SkipCanonicalVarint32(Reader& src);
bool SkipCanonicalVarint64(Reader& src);

// Skips a varint, at most `available` bytes long, without decoding but with
// validation.
//
// Return values:
//  * positive `length` - success
//  * 0                 - source ends too early or varint is invalid
//                        (`src` moved less than `available`
//                        unless `available == 0`)
bool SkipVarint32(absl::Cord::CharIterator& src, size_t available);
bool SkipVarint64(absl::Cord::CharIterator& src, size_t available);

// Skips a varint, at most `available` bytes long, without decoding but with
// validation.
//
// Accepts only the canonical representation, i.e. the shortest: rejecting a
// trailing zero byte, except for 0 itself.
//
// Return values:
//  * positive `length` - success
//  * 0                 - source ends too early or varint is invalid
//                        (`src` moved less than `available`
//                        unless `available == 0`)
bool SkipCanonicalVarint32(absl::Cord::CharIterator& src, size_t available);
bool SkipCanonicalVarint64(absl::Cord::CharIterator& src, size_t available);

// Skips a varint from an array, without decoding but with validation.
//
// Return values:
//  * positive `length` - success, `length` bytes can be skipped
//  * 0                 - source ends too early or varint is invalid
size_t SkipVarint32(const char* src, size_t available);
size_t SkipVarint64(const char* src, size_t available);

// Skips a varint from an array, without decoding but with validation.
//
// Accepts only the canonical representation, i.e. the shortest: rejecting a
// trailing zero byte, except for 0 itself.
//
// Return values:
//  * positive `length` - success, `length` bytes can be skipped
//  * 0                 - source ends too early or varint is invalid
size_t SkipCanonicalVarint32(const char* src, size_t available);
size_t SkipCanonicalVarint64(const char* src, size_t available);

// Decodes a signed varint (zigzag-decoding) from an unsigned value read as a
// plain varint. This corresponds to protobuf types `sint{32,64}`.
constexpr int32_t DecodeVarintSigned32(uint32_t repr);
constexpr int64_t DecodeVarintSigned64(uint64_t repr);

// Counts the number of varints in `value`.
//
// If varints are valid only up to some point, then returns at least the number
// of valid varints. Returns `value.size()` only if each varint is valid and
// takes one byte.
size_t CountVarints(absl::string_view value);

// Checks if each byte of `value` is a valid representation for a `bool`,
// i.e. 0 or 1. Optimized for the result being `true`.
bool VerifyBools(absl::string_view value);

// Implementation details follow.

namespace varint_internal {

inline size_t Remaining(const absl::Cord::CharIterator& src) {
  return IntCast<size_t>(absl::Cord::Distance(src, absl::Cord::CharIterator()));
}

template <typename T, bool canonical, size_t initial_index>
bool ReadVarintFromReaderBuffer(Reader& src, const char* cursor, T acc,
                                T& dest);

extern template bool ReadVarintFromReaderBuffer<uint32_t, false, 2>(
    Reader& src, const char* cursor, uint32_t acc, uint32_t& dest);
extern template bool ReadVarintFromReaderBuffer<uint64_t, false, 2>(
    Reader& src, const char* cursor, uint64_t acc, uint64_t& dest);
extern template bool ReadVarintFromReaderBuffer<uint32_t, true, 2>(
    Reader& src, const char* cursor, uint32_t acc, uint32_t& dest);
extern template bool ReadVarintFromReaderBuffer<uint64_t, true, 2>(
    Reader& src, const char* cursor, uint64_t acc, uint64_t& dest);

template <typename T, bool canonical, size_t initial_index>
bool ReadVarintFromReader(Reader& src, T acc, T& dest);

extern template bool ReadVarintFromReader<uint32_t, false, 1>(Reader& src,
                                                              uint32_t acc,
                                                              uint32_t& dest);
extern template bool ReadVarintFromReader<uint64_t, false, 1>(Reader& src,
                                                              uint64_t acc,
                                                              uint64_t& dest);
extern template bool ReadVarintFromReader<uint32_t, true, 1>(Reader& src,
                                                             uint32_t acc,
                                                             uint32_t& dest);
extern template bool ReadVarintFromReader<uint64_t, true, 1>(Reader& src,
                                                             uint64_t acc,
                                                             uint64_t& dest);

}  // namespace varint_internal

inline bool ReadVarint32(Reader& src, uint32_t& dest) {
  if (ABSL_PREDICT_FALSE(!src.Pull(1, kMaxLengthVarint32))) return false;
  const uint32_t byte0 = uint32_t{static_cast<uint8_t>(src.cursor()[0])};
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    src.move_cursor(1);
    dest = byte0;
    return true;
  }
  if (ABSL_PREDICT_TRUE(src.available() >= 2)) {
    const uint32_t byte1 = uint32_t{static_cast<uint8_t>(src.cursor()[1])};
    const uint32_t acc = byte0 + ((byte1 - 1) << 7);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      src.move_cursor(2);
      dest = acc;
      return true;
    }
    return varint_internal::ReadVarintFromReaderBuffer<uint32_t,
                                                       /*canonical=*/false, 2>(
        src, src.cursor(), acc, dest);
  }
  return varint_internal::ReadVarintFromReader<uint32_t,
                                               /*canonical=*/false, 1>(
      src, byte0, dest);
}

inline bool ReadVarint64(Reader& src, uint64_t& dest) {
  if (ABSL_PREDICT_FALSE(!src.Pull(1, kMaxLengthVarint64))) return false;
  const uint64_t byte0 = uint64_t{static_cast<uint8_t>(src.cursor()[0])};
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    src.move_cursor(1);
    dest = byte0;
    return true;
  }
  if (ABSL_PREDICT_TRUE(src.available() >= 2)) {
    const uint64_t byte1 = uint64_t{static_cast<uint8_t>(src.cursor()[1])};
    const uint64_t acc = byte0 + ((byte1 - 1) << 7);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      src.move_cursor(2);
      dest = acc;
      return true;
    }
    return varint_internal::ReadVarintFromReaderBuffer<uint64_t,
                                                       /*canonical=*/false, 2>(
        src, src.cursor(), acc, dest);
  }
  return varint_internal::ReadVarintFromReader<uint64_t,
                                               /*canonical=*/false, 1>(
      src, byte0, dest);
}

inline bool ReadVarintSigned32(Reader& src, int32_t& dest) {
  uint32_t unsigned_dest;
  if (ABSL_PREDICT_FALSE(!ReadVarint32(src, unsigned_dest))) return false;
  dest = DecodeVarintSigned32(unsigned_dest);
  return true;
}

inline bool ReadVarintSigned64(Reader& src, int64_t& dest) {
  uint64_t unsigned_dest;
  if (ABSL_PREDICT_FALSE(!ReadVarint64(src, unsigned_dest))) return false;
  dest = DecodeVarintSigned64(unsigned_dest);
  return true;
}

inline bool ReadCanonicalVarint32(Reader& src, uint32_t& dest) {
  if (ABSL_PREDICT_FALSE(!src.Pull(1, kMaxLengthVarint32))) return false;
  const uint32_t byte0 = uint32_t{static_cast<uint8_t>(src.cursor()[0])};
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    src.move_cursor(1);
    dest = byte0;
    return true;
  }
  if (ABSL_PREDICT_TRUE(src.available() >= 2)) {
    const uint32_t byte1 = uint32_t{static_cast<uint8_t>(src.cursor()[1])};
    const uint32_t acc = byte0 + ((byte1 - 1) << 7);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      if (ABSL_PREDICT_FALSE(byte1 == 0)) return false;
      src.move_cursor(2);
      dest = acc;
      return true;
    }
    return varint_internal::ReadVarintFromReaderBuffer<uint32_t,
                                                       /*canonical=*/true, 2>(
        src, src.cursor(), acc, dest);
  }
  return varint_internal::ReadVarintFromReader<uint32_t,
                                               /*canonical=*/true, 1>(
      src, byte0, dest);
}

inline bool ReadCanonicalVarint64(Reader& src, uint64_t& dest) {
  if (ABSL_PREDICT_FALSE(!src.Pull(1, kMaxLengthVarint64))) return false;
  const uint64_t byte0 = uint64_t{static_cast<uint8_t>(src.cursor()[0])};
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    src.move_cursor(1);
    dest = byte0;
    return true;
  }
  if (ABSL_PREDICT_TRUE(src.available() >= 2)) {
    const uint64_t byte1 = uint64_t{static_cast<uint8_t>(src.cursor()[1])};
    const uint64_t acc = byte0 + ((byte1 - 1) << 7);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      if (ABSL_PREDICT_FALSE(byte1 == 0)) return false;
      src.move_cursor(2);
      dest = acc;
      return true;
    }
    return varint_internal::ReadVarintFromReaderBuffer<uint64_t,
                                                       /*canonical=*/true, 2>(
        src, src.cursor(), acc, dest);
  }
  return varint_internal::ReadVarintFromReader<uint64_t,
                                               /*canonical=*/true, 1>(
      src, byte0, dest);
}

namespace varint_internal {

template <typename T, bool canonical, size_t initial_index>
bool ReadVarintFromCordBuffer(absl::Cord::CharIterator& src, size_t available,
                              T acc, T& dest);

extern template bool ReadVarintFromCordBuffer<uint32_t, false, 2>(
    absl::Cord::CharIterator& src, size_t available, uint32_t acc,
    uint32_t& dest);
extern template bool ReadVarintFromCordBuffer<uint64_t, false, 2>(
    absl::Cord::CharIterator& src, size_t available, uint64_t acc,
    uint64_t& dest);
extern template bool ReadVarintFromCordBuffer<uint32_t, true, 2>(
    absl::Cord::CharIterator& src, size_t available, uint32_t acc,
    uint32_t& dest);
extern template bool ReadVarintFromCordBuffer<uint64_t, true, 2>(
    absl::Cord::CharIterator& src, size_t available, uint64_t acc,
    uint64_t& dest);

template <typename T, bool canonical, size_t initial_index>
bool ReadVarintFromCord(absl::Cord::CharIterator& src, size_t available, T acc,
                        T& dest);

extern template bool ReadVarintFromCord<uint32_t, false, 1>(
    absl::Cord::CharIterator& src, size_t available, uint32_t acc,
    uint32_t& dest);
extern template bool ReadVarintFromCord<uint64_t, false, 1>(
    absl::Cord::CharIterator& src, size_t available, uint64_t acc,
    uint64_t& dest);
extern template bool ReadVarintFromCord<uint32_t, true, 1>(
    absl::Cord::CharIterator& src, size_t available, uint32_t acc,
    uint32_t& dest);
extern template bool ReadVarintFromCord<uint64_t, true, 1>(
    absl::Cord::CharIterator& src, size_t available, uint64_t acc,
    uint64_t& dest);

}  // namespace varint_internal

inline bool ReadVarint32(absl::Cord::CharIterator& src, size_t available,
                         uint32_t& dest) {
  RIEGELI_ASSERT_LE(available, varint_internal::Remaining(src))
      << "Failed precondition of ReadVarint32(): not enough remaining data";
  if (ABSL_PREDICT_FALSE(available == 0)) return false;
  const uint32_t byte0 = uint32_t{static_cast<uint8_t>(*src)};
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    ++src;
    dest = byte0;
    return true;
  }
  if (ABSL_PREDICT_FALSE(available == 1)) return false;
  const absl::string_view chunk = absl::Cord::ChunkRemaining(src);
  if (ABSL_PREDICT_TRUE(chunk.size() >= 2)) {
    const uint32_t byte1 = uint32_t{static_cast<uint8_t>(chunk[1])};
    const uint32_t acc = byte0 + ((byte1 - 1) << 7);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      absl::Cord::Advance(&src, 2);
      dest = acc;
      return true;
    }
    return varint_internal::ReadVarintFromCordBuffer<uint32_t,
                                                     /*canonical=*/false, 2>(
        src, available, acc, dest);
  }
  return varint_internal::ReadVarintFromCord<uint32_t,
                                             /*canonical=*/false, 1>(
      src, available, byte0, dest);
}

inline bool ReadVarint64(absl::Cord::CharIterator& src, size_t available,
                         uint64_t& dest) {
  RIEGELI_ASSERT_LE(available, varint_internal::Remaining(src))
      << "Failed precondition of ReadVarint64(): not enough remaining data";
  if (ABSL_PREDICT_FALSE(available == 0)) return false;
  const uint64_t byte0 = uint64_t{static_cast<uint8_t>(*src)};
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    ++src;
    dest = byte0;
    return true;
  }
  if (ABSL_PREDICT_FALSE(available == 1)) return false;
  const absl::string_view chunk = absl::Cord::ChunkRemaining(src);
  if (ABSL_PREDICT_TRUE(chunk.size() >= 2)) {
    const uint64_t byte1 = uint64_t{static_cast<uint8_t>(chunk[1])};
    const uint64_t acc = byte0 + ((byte1 - 1) << 7);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      absl::Cord::Advance(&src, 2);
      dest = acc;
      return true;
    }
    return varint_internal::ReadVarintFromCordBuffer<uint64_t,
                                                     /*canonical=*/false, 2>(
        src, available, acc, dest);
  }
  return varint_internal::ReadVarintFromCord<uint64_t,
                                             /*canonical=*/false, 1>(
      src, available, byte0, dest);
}

inline bool ReadVarintSigned32(absl::Cord::CharIterator& src, size_t available,
                               int32_t& dest) {
  uint32_t unsigned_dest;
  if (ABSL_PREDICT_FALSE(!ReadVarint32(src, available, unsigned_dest))) {
    return false;
  }
  dest = DecodeVarintSigned32(unsigned_dest);
  return true;
}

inline bool ReadVarintSigned64(absl::Cord::CharIterator& src, size_t available,
                               int64_t& dest) {
  uint64_t unsigned_dest;
  if (ABSL_PREDICT_FALSE(!ReadVarint64(src, available, unsigned_dest))) {
    return false;
  }
  dest = DecodeVarintSigned64(unsigned_dest);
  return true;
}

inline bool ReadCanonicalVarint32(absl::Cord::CharIterator& src,
                                  size_t available, uint32_t& dest) {
  RIEGELI_ASSERT_LE(available, varint_internal::Remaining(src))
      << "Failed precondition of ReadCanonicalVarint32(): "
         "not enough remaining data";
  if (ABSL_PREDICT_FALSE(available == 0)) return false;
  const uint32_t byte0 = uint32_t{static_cast<uint8_t>(*src)};
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    ++src;
    dest = byte0;
    return true;
  }
  if (ABSL_PREDICT_FALSE(available == 1)) return false;
  const absl::string_view chunk = absl::Cord::ChunkRemaining(src);
  if (ABSL_PREDICT_TRUE(chunk.size() >= 2)) {
    const uint32_t byte1 = uint32_t{static_cast<uint8_t>(chunk[1])};
    const uint32_t acc = byte0 + ((byte1 - 1) << 7);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      if (ABSL_PREDICT_FALSE(byte1 == 0)) return false;
      absl::Cord::Advance(&src, 2);
      dest = acc;
      return true;
    }
    return varint_internal::ReadVarintFromCordBuffer<uint32_t,
                                                     /*canonical=*/true, 2>(
        src, available, acc, dest);
  }
  return varint_internal::ReadVarintFromCord<uint32_t,
                                             /*canonical=*/true, 1>(
      src, available, byte0, dest);
}

inline bool ReadCanonicalVarint64(absl::Cord::CharIterator& src,
                                  size_t available, uint64_t& dest) {
  RIEGELI_ASSERT_LE(available, varint_internal::Remaining(src))
      << "Failed precondition of ReadCanonicalVarint64(): "
         "not enough remaining data";
  if (ABSL_PREDICT_FALSE(available == 0)) return false;
  const uint64_t byte0 = uint64_t{static_cast<uint8_t>(*src)};
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    ++src;
    dest = byte0;
    return true;
  }
  if (ABSL_PREDICT_FALSE(available == 1)) return false;
  const absl::string_view chunk = absl::Cord::ChunkRemaining(src);
  if (ABSL_PREDICT_TRUE(chunk.size() >= 2)) {
    const uint64_t byte1 = uint64_t{static_cast<uint8_t>(chunk[1])};
    const uint64_t acc = byte0 + ((byte1 - 1) << 7);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      if (ABSL_PREDICT_FALSE(byte1 == 0)) return false;
      absl::Cord::Advance(&src, 2);
      dest = acc;
      return true;
    }
    return varint_internal::ReadVarintFromCordBuffer<uint64_t,
                                                     /*canonical=*/true, 2>(
        src, available, acc, dest);
  }
  return varint_internal::ReadVarintFromCord<uint64_t,
                                             /*canonical=*/true, 1>(
      src, available, byte0, dest);
}

namespace varint_internal {

template <typename T, bool canonical, size_t initial_index>
size_t ReadVarintFromArray(const char* src, size_t available, T acc, T& dest);

extern template size_t ReadVarintFromArray<uint32_t, false, 2>(const char* src,
                                                               size_t available,
                                                               uint32_t acc,
                                                               uint32_t& dest);
extern template size_t ReadVarintFromArray<uint64_t, false, 2>(const char* src,
                                                               size_t available,
                                                               uint64_t acc,
                                                               uint64_t& dest);
extern template size_t ReadVarintFromArray<uint32_t, true, 2>(const char* src,
                                                              size_t available,
                                                              uint32_t acc,
                                                              uint32_t& dest);
extern template size_t ReadVarintFromArray<uint64_t, true, 2>(const char* src,
                                                              size_t available,
                                                              uint64_t acc,
                                                              uint64_t& dest);

}  // namespace varint_internal

inline size_t ReadVarint32(const char* src, size_t available, uint32_t& dest) {
  if (ABSL_PREDICT_FALSE(available == 0)) return 0;
  const uint32_t byte0 = uint32_t{static_cast<uint8_t>(src[0])};
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    dest = byte0;
    return 1;
  }
  if (ABSL_PREDICT_FALSE(available == 1)) return 0;
  const uint32_t byte1 = uint32_t{static_cast<uint8_t>(src[1])};
  const uint32_t acc = byte0 + ((byte1 - 1) << 7);
  if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
    dest = acc;
    return 2;
  }
  return varint_internal::ReadVarintFromArray<uint32_t, /*canonical=*/false, 2>(
      src, available, acc, dest);
}

inline size_t ReadVarint64(const char* src, size_t available, uint64_t& dest) {
  if (ABSL_PREDICT_FALSE(available == 0)) return 0;
  const uint64_t byte0 = uint64_t{static_cast<uint8_t>(src[0])};
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    dest = byte0;
    return 1;
  }
  if (ABSL_PREDICT_FALSE(available == 1)) return 0;
  const uint64_t byte1 = uint64_t{static_cast<uint8_t>(src[1])};
  const uint64_t acc = byte0 + ((byte1 - 1) << 7);
  if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
    dest = acc;
    return 2;
  }
  return varint_internal::ReadVarintFromArray<uint64_t, /*canonical=*/false, 2>(
      src, available, acc, dest);
}

inline size_t ReadVarintSigned32(const char* src, size_t available,
                                 int32_t& dest) {
  uint32_t unsigned_dest;
  const size_t length = ReadVarint32(src, available, unsigned_dest);
  if (ABSL_PREDICT_FALSE(length == 0)) return 0;
  dest = DecodeVarintSigned32(unsigned_dest);
  return length;
}

inline size_t ReadVarintSigned64(const char* src, size_t available,
                                 int64_t& dest) {
  uint64_t unsigned_dest;
  const size_t length = ReadVarint64(src, available, unsigned_dest);
  if (ABSL_PREDICT_FALSE(length == 0)) return 0;
  dest = DecodeVarintSigned64(unsigned_dest);
  return length;
}

inline size_t ReadCanonicalVarint32(const char* src, size_t available,
                                    uint32_t& dest) {
  if (ABSL_PREDICT_FALSE(available == 0)) return 0;
  const uint32_t byte0 = uint32_t{static_cast<uint8_t>(src[0])};
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    dest = byte0;
    return 1;
  }
  if (ABSL_PREDICT_FALSE(available == 1)) return 0;
  const uint32_t byte1 = uint32_t{static_cast<uint8_t>(src[1])};
  const uint32_t acc = byte0 + ((byte1 - 1) << 7);
  if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
    if (ABSL_PREDICT_FALSE(byte1 == 0)) return 0;
    dest = acc;
    return 2;
  }
  return varint_internal::ReadVarintFromArray<uint32_t, /*canonical=*/true, 2>(
      src, available, acc, dest);
}

inline size_t ReadCanonicalVarint64(const char* src, size_t available,
                                    uint64_t& dest) {
  if (ABSL_PREDICT_FALSE(available == 0)) return 0;
  const uint64_t byte0 = uint64_t{static_cast<uint8_t>(src[0])};
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    dest = byte0;
    return 1;
  }
  if (ABSL_PREDICT_FALSE(available == 1)) return 0;
  const uint64_t byte1 = uint64_t{static_cast<uint8_t>(src[1])};
  const uint64_t acc = byte0 + ((byte1 - 1) << 7);
  if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
    if (ABSL_PREDICT_FALSE(byte1 == 0)) return 0;
    dest = acc;
    return 2;
  }
  return varint_internal::ReadVarintFromArray<uint64_t, /*canonical=*/true, 2>(
      src, available, acc, dest);
}

inline std::optional<const char*> ReadVarint32(const char* src,
                                               const char* limit,
                                               uint32_t& dest) {
  const size_t length = ReadVarint32(src, PtrDistance(src, limit), dest);
  if (ABSL_PREDICT_FALSE(length == 0)) return std::nullopt;
  return src + length;
}

inline std::optional<const char*> ReadVarint64(const char* src,
                                               const char* limit,
                                               uint64_t& dest) {
  const size_t length = ReadVarint64(src, PtrDistance(src, limit), dest);
  if (ABSL_PREDICT_FALSE(length == 0)) return std::nullopt;
  return src + length;
}

inline std::optional<const char*> ReadVarintSigned32(const char* src,
                                                     const char* limit,
                                                     int32_t& dest) {
  const size_t length = ReadVarintSigned32(src, PtrDistance(src, limit), dest);
  if (ABSL_PREDICT_FALSE(length == 0)) return std::nullopt;
  return src + length;
}

inline std::optional<const char*> ReadVarintSigned64(const char* src,
                                                     const char* limit,
                                                     int64_t& dest) {
  const size_t length = ReadVarintSigned64(src, PtrDistance(src, limit), dest);
  if (ABSL_PREDICT_FALSE(length == 0)) return std::nullopt;
  return src + length;
}

namespace varint_internal {

template <typename T, bool canonical, size_t initial_index>
size_t CopyVarintFromReaderBuffer(Reader& src, const char* cursor, char* dest);

extern template size_t CopyVarintFromReaderBuffer<uint32_t, false, 2>(
    Reader& src, const char* cursor, char* dest);
extern template size_t CopyVarintFromReaderBuffer<uint64_t, false, 2>(
    Reader& src, const char* cursor, char* dest);
extern template size_t CopyVarintFromReaderBuffer<uint32_t, true, 2>(
    Reader& src, const char* cursor, char* dest);
extern template size_t CopyVarintFromReaderBuffer<uint64_t, true, 2>(
    Reader& src, const char* cursor, char* dest);

template <typename T, bool canonical, size_t initial_index>
size_t CopyVarintFromReader(Reader& src, char* dest);

extern template size_t CopyVarintFromReader<uint32_t, false, 1>(Reader& src,
                                                                char* dest);
extern template size_t CopyVarintFromReader<uint64_t, false, 1>(Reader& src,
                                                                char* dest);

extern template size_t CopyVarintFromReader<uint32_t, true, 1>(Reader& src,
                                                               char* dest);
extern template size_t CopyVarintFromReader<uint64_t, true, 1>(Reader& src,
                                                               char* dest);

}  // namespace varint_internal

inline size_t CopyVarint32(Reader& src, char* dest) {
  if (ABSL_PREDICT_FALSE(!src.Pull(1, kMaxLengthVarint32))) return 0;
  const uint8_t byte0 = static_cast<uint8_t>(src.cursor()[0]);
  dest[0] = static_cast<char>(byte0);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    src.move_cursor(1);
    return 1;
  }
  if (ABSL_PREDICT_TRUE(src.available() >= 2)) {
    const uint8_t byte1 = static_cast<uint8_t>(src.cursor()[1]);
    dest[1] = static_cast<char>(byte1);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      src.move_cursor(2);
      return 2;
    }
    return varint_internal::CopyVarintFromReaderBuffer<uint32_t,
                                                       /*canonical=*/false, 2>(
        src, src.cursor(), dest);
  }
  return varint_internal::CopyVarintFromReader<uint32_t, /*canonical=*/false,
                                               1>(src, dest);
}

inline size_t CopyVarint64(Reader& src, char* dest) {
  if (ABSL_PREDICT_FALSE(!src.Pull(1, kMaxLengthVarint64))) return 0;
  const uint8_t byte0 = static_cast<uint8_t>(src.cursor()[0]);
  dest[0] = static_cast<char>(byte0);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    src.move_cursor(1);
    return 1;
  }
  if (ABSL_PREDICT_TRUE(src.available() >= 2)) {
    const uint8_t byte1 = static_cast<uint8_t>(src.cursor()[1]);
    dest[1] = static_cast<char>(byte1);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      src.move_cursor(2);
      return 2;
    }
    return varint_internal::CopyVarintFromReaderBuffer<uint64_t,
                                                       /*canonical=*/false, 2>(
        src, src.cursor(), dest);
  }
  return varint_internal::CopyVarintFromReader<uint64_t, /*canonical=*/false,
                                               1>(src, dest);
}

inline size_t CopyCanonicalVarint32(Reader& src, char* dest) {
  if (ABSL_PREDICT_FALSE(!src.Pull(1, kMaxLengthVarint32))) return 0;
  const uint8_t byte0 = static_cast<uint8_t>(src.cursor()[0]);
  dest[0] = static_cast<char>(byte0);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    src.move_cursor(1);
    return 1;
  }
  if (ABSL_PREDICT_TRUE(src.available() >= 2)) {
    const uint8_t byte1 = static_cast<uint8_t>(src.cursor()[1]);
    dest[1] = static_cast<char>(byte1);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      if (ABSL_PREDICT_FALSE(byte1 == 0)) return 0;
      src.move_cursor(2);
      return 2;
    }
    return varint_internal::CopyVarintFromReaderBuffer<uint32_t,
                                                       /*canonical=*/true, 2>(
        src, src.cursor(), dest);
  }
  return varint_internal::CopyVarintFromReader<uint32_t, /*canonical=*/true, 1>(
      src, dest);
}

inline size_t CopyCanonicalVarint64(Reader& src, char* dest) {
  if (ABSL_PREDICT_FALSE(!src.Pull(1, kMaxLengthVarint64))) return 0;
  const uint8_t byte0 = static_cast<uint8_t>(src.cursor()[0]);
  dest[0] = static_cast<char>(byte0);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    src.move_cursor(1);
    return 1;
  }
  if (ABSL_PREDICT_TRUE(src.available() >= 2)) {
    const uint8_t byte1 = static_cast<uint8_t>(src.cursor()[1]);
    dest[1] = static_cast<char>(byte1);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      if (ABSL_PREDICT_FALSE(byte1 == 0)) return 0;
      src.move_cursor(2);
      return 2;
    }
    return varint_internal::CopyVarintFromReaderBuffer<uint64_t,
                                                       /*canonical=*/true, 2>(
        src, src.cursor(), dest);
  }
  return varint_internal::CopyVarintFromReader<uint64_t, /*canonical=*/true, 1>(
      src, dest);
}

namespace varint_internal {

template <typename T, bool canonical, size_t initial_index>
size_t CopyVarintFromCordBuffer(absl::Cord::CharIterator& src, size_t available,
                                char* dest);

extern template size_t CopyVarintFromCordBuffer<uint32_t, false, 2>(
    absl::Cord::CharIterator& src, size_t available, char* dest);
extern template size_t CopyVarintFromCordBuffer<uint64_t, false, 2>(
    absl::Cord::CharIterator& src, size_t available, char* dest);
extern template size_t CopyVarintFromCordBuffer<uint32_t, true, 2>(
    absl::Cord::CharIterator& src, size_t available, char* dest);
extern template size_t CopyVarintFromCordBuffer<uint64_t, true, 2>(
    absl::Cord::CharIterator& src, size_t available, char* dest);

template <typename T, bool canonical, size_t initial_index>
size_t CopyVarintFromCord(absl::Cord::CharIterator& src, size_t available,
                          char* dest);

extern template size_t CopyVarintFromCord<uint32_t, false, 1>(
    absl::Cord::CharIterator& src, size_t available, char* dest);
extern template size_t CopyVarintFromCord<uint64_t, false, 1>(
    absl::Cord::CharIterator& src, size_t available, char* dest);
extern template size_t CopyVarintFromCord<uint32_t, true, 1>(
    absl::Cord::CharIterator& src, size_t available, char* dest);
extern template size_t CopyVarintFromCord<uint64_t, true, 1>(
    absl::Cord::CharIterator& src, size_t available, char* dest);

}  // namespace varint_internal

inline size_t CopyVarint32(absl::Cord::CharIterator& src, size_t available,
                           char* dest) {
  RIEGELI_ASSERT_LE(available, varint_internal::Remaining(src))
      << "Failed precondition of CopyVarint32(): not enough remaining data";
  if (ABSL_PREDICT_FALSE(available == 0)) return 0;
  const uint8_t byte0 = static_cast<uint8_t>(*src);
  dest[0] = static_cast<char>(byte0);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    ++src;
    return 1;
  }
  if (ABSL_PREDICT_FALSE(available == 1)) return 0;
  const absl::string_view chunk = absl::Cord::ChunkRemaining(src);
  if (ABSL_PREDICT_TRUE(chunk.size() >= 2)) {
    const uint8_t byte1 = static_cast<uint8_t>(chunk[1]);
    dest[1] = static_cast<char>(byte1);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      absl::Cord::Advance(&src, 2);
      return 2;
    }
    return varint_internal::CopyVarintFromCordBuffer<uint32_t,
                                                     /*canonical=*/false, 2>(
        src, available, dest);
  }
  return varint_internal::CopyVarintFromCord<uint32_t,
                                             /*canonical=*/false, 1>(
      src, available, dest);
}

inline size_t CopyVarint64(absl::Cord::CharIterator& src, size_t available,
                           char* dest) {
  RIEGELI_ASSERT_LE(available, varint_internal::Remaining(src))
      << "Failed precondition of CopyVarint64(): not enough remaining data";
  if (ABSL_PREDICT_FALSE(available == 0)) return 0;
  const uint8_t byte0 = static_cast<uint8_t>(*src);
  dest[0] = static_cast<char>(byte0);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    ++src;
    return 1;
  }
  if (ABSL_PREDICT_FALSE(available == 1)) return 0;
  const absl::string_view chunk = absl::Cord::ChunkRemaining(src);
  if (ABSL_PREDICT_TRUE(chunk.size() >= 2)) {
    const uint8_t byte1 = static_cast<uint8_t>(chunk[1]);
    dest[1] = static_cast<char>(byte1);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      absl::Cord::Advance(&src, 2);
      return 2;
    }
    return varint_internal::CopyVarintFromCordBuffer<uint64_t,
                                                     /*canonical=*/false, 2>(
        src, available, dest);
  }
  return varint_internal::CopyVarintFromCord<uint64_t,
                                             /*canonical=*/false, 1>(
      src, available, dest);
}

inline size_t CopyCanonicalVarint32(absl::Cord::CharIterator& src,
                                    size_t available, char* dest) {
  RIEGELI_ASSERT_LE(available, varint_internal::Remaining(src))
      << "Failed precondition of CopyCanonicalVarint32(): "
         "not enough remaining data";
  if (ABSL_PREDICT_FALSE(available == 0)) return 0;
  const uint8_t byte0 = static_cast<uint8_t>(*src);
  dest[0] = static_cast<char>(byte0);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    ++src;
    return 1;
  }
  if (ABSL_PREDICT_FALSE(available == 1)) return 0;
  const absl::string_view chunk = absl::Cord::ChunkRemaining(src);
  if (ABSL_PREDICT_TRUE(chunk.size() >= 2)) {
    const uint8_t byte1 = static_cast<uint8_t>(chunk[1]);
    dest[1] = static_cast<char>(byte1);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      if (ABSL_PREDICT_FALSE(byte1 == 0)) return 0;
      absl::Cord::Advance(&src, 2);
      return 2;
    }
    return varint_internal::CopyVarintFromCordBuffer<uint32_t,
                                                     /*canonical=*/true, 2>(
        src, available, dest);
  }
  return varint_internal::CopyVarintFromCord<uint32_t,
                                             /*canonical=*/true, 1>(
      src, available, dest);
}

inline size_t CopyCanonicalVarint64(absl::Cord::CharIterator& src,
                                    size_t available, char* dest) {
  RIEGELI_ASSERT_LE(available, varint_internal::Remaining(src))
      << "Failed precondition of CopyCanonicalVarint64(): "
         "not enough remaining data";
  if (ABSL_PREDICT_FALSE(available == 0)) return 0;
  const uint8_t byte0 = static_cast<uint8_t>(*src);
  dest[0] = static_cast<char>(byte0);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    ++src;
    return 1;
  }
  if (ABSL_PREDICT_FALSE(available == 1)) return 0;
  const absl::string_view chunk = absl::Cord::ChunkRemaining(src);
  if (ABSL_PREDICT_TRUE(chunk.size() >= 2)) {
    const uint8_t byte1 = static_cast<uint8_t>(chunk[1]);
    dest[1] = static_cast<char>(byte1);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      if (ABSL_PREDICT_FALSE(byte1 == 0)) return 0;
      absl::Cord::Advance(&src, 2);
      return 2;
    }
    return varint_internal::CopyVarintFromCordBuffer<uint64_t,
                                                     /*canonical=*/true, 2>(
        src, available, dest);
  }
  return varint_internal::CopyVarintFromCord<uint64_t,
                                             /*canonical=*/true, 1>(
      src, available, dest);
}

namespace varint_internal {

template <typename T, bool canonical, size_t initial_index>
size_t CopyVarintFromArray(const char* src, size_t available, char* dest);

extern template size_t CopyVarintFromArray<uint32_t, false, 2>(const char* src,
                                                               size_t available,
                                                               char* dest);
extern template size_t CopyVarintFromArray<uint64_t, false, 2>(const char* src,
                                                               size_t available,
                                                               char* dest);
extern template size_t CopyVarintFromArray<uint32_t, true, 2>(const char* src,
                                                              size_t available,
                                                              char* dest);
extern template size_t CopyVarintFromArray<uint64_t, true, 2>(const char* src,
                                                              size_t available,
                                                              char* dest);

}  // namespace varint_internal

inline size_t CopyVarint32(const char* src, size_t available, char* dest) {
  if (ABSL_PREDICT_FALSE(available == 0)) return 0;
  const uint8_t byte0 = static_cast<uint8_t>(src[0]);
  dest[0] = static_cast<char>(byte0);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) return 1;
  if (ABSL_PREDICT_FALSE(available == 1)) return 0;
  const uint8_t byte1 = static_cast<uint8_t>(src[1]);
  dest[1] = static_cast<char>(byte1);
  if (ABSL_PREDICT_TRUE(byte1 < 0x80)) return 2;
  return varint_internal::CopyVarintFromArray<uint32_t, /*canonical=*/false, 2>(
      src, available, dest);
}

inline size_t CopyVarint64(const char* src, size_t available, char* dest) {
  if (ABSL_PREDICT_FALSE(available == 0)) return 0;
  const uint8_t byte0 = static_cast<uint8_t>(src[0]);
  dest[0] = static_cast<char>(byte0);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) return 1;
  if (ABSL_PREDICT_FALSE(available == 1)) return 0;
  const uint8_t byte1 = static_cast<uint8_t>(src[1]);
  dest[1] = static_cast<char>(byte1);
  if (ABSL_PREDICT_TRUE(byte1 < 0x80)) return 2;
  return varint_internal::CopyVarintFromArray<uint64_t, /*canonical=*/false, 2>(
      src, available, dest);
}

inline std::optional<size_t> CopyVarint32(const char* src, const char* limit,
                                          char* dest) {
  const size_t length = CopyVarint32(src, PtrDistance(src, limit), dest);
  if (ABSL_PREDICT_FALSE(length == 0)) return std::nullopt;
  return length;
}

inline std::optional<size_t> CopyVarint64(const char* src, const char* limit,
                                          char* dest) {
  const size_t length = CopyVarint64(src, PtrDistance(src, limit), dest);
  if (ABSL_PREDICT_FALSE(length == 0)) return std::nullopt;
  return length;
}

inline size_t CopyCanonicalVarint32(const char* src, size_t available,
                                    char* dest) {
  if (ABSL_PREDICT_FALSE(available == 0)) return 0;
  const uint8_t byte0 = static_cast<uint8_t>(src[0]);
  dest[0] = static_cast<char>(byte0);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) return 1;
  if (ABSL_PREDICT_FALSE(available == 1)) return 0;
  const uint8_t byte1 = static_cast<uint8_t>(src[1]);
  dest[1] = static_cast<char>(byte1);
  if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
    if (ABSL_PREDICT_FALSE(byte1 == 0)) return 0;
    return 2;
  }
  return varint_internal::CopyVarintFromArray<uint32_t, /*canonical=*/true, 2>(
      src, available, dest);
}

inline size_t CopyCanonicalVarint64(const char* src, size_t available,
                                    char* dest) {
  if (ABSL_PREDICT_FALSE(available == 0)) return 0;
  const uint8_t byte0 = static_cast<uint8_t>(src[0]);
  dest[0] = static_cast<char>(byte0);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) return 1;
  if (ABSL_PREDICT_FALSE(available == 1)) return 0;
  const uint8_t byte1 = static_cast<uint8_t>(src[1]);
  dest[1] = static_cast<char>(byte1);
  if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
    if (ABSL_PREDICT_FALSE(byte1 == 0)) return 0;
    return 2;
  }
  return varint_internal::CopyVarintFromArray<uint64_t, /*canonical=*/true, 2>(
      src, available, dest);
}

namespace varint_internal {

template <typename T, bool canonical, size_t initial_index>
bool SkipVarintFromReaderBuffer(Reader& src, const char* cursor);

extern template bool SkipVarintFromReaderBuffer<uint32_t, false, 2>(
    Reader& src, const char* cursor);
extern template bool SkipVarintFromReaderBuffer<uint64_t, false, 2>(
    Reader& src, const char* cursor);
extern template bool SkipVarintFromReaderBuffer<uint32_t, true, 2>(
    Reader& src, const char* cursor);
extern template bool SkipVarintFromReaderBuffer<uint64_t, true, 2>(
    Reader& src, const char* cursor);

template <typename T, bool canonical, size_t initial_index>
bool SkipVarintFromReader(Reader& src);

extern template bool SkipVarintFromReader<uint32_t, false, 1>(Reader& src);
extern template bool SkipVarintFromReader<uint64_t, false, 1>(Reader& src);
extern template bool SkipVarintFromReader<uint32_t, true, 1>(Reader& src);
extern template bool SkipVarintFromReader<uint64_t, true, 1>(Reader& src);

}  // namespace varint_internal

inline bool SkipVarint32(Reader& src) {
  if (ABSL_PREDICT_FALSE(!src.Pull(1, kMaxLengthVarint32))) return false;
  const uint8_t byte0 = static_cast<uint8_t>(src.cursor()[0]);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    src.move_cursor(1);
    return true;
  }
  if (ABSL_PREDICT_TRUE(src.available() >= 2)) {
    const uint8_t byte1 = static_cast<uint8_t>(src.cursor()[1]);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      src.move_cursor(2);
      return true;
    }
    return varint_internal::SkipVarintFromReaderBuffer<uint32_t,
                                                       /*canonical=*/false, 2>(
        src, src.cursor());
  }
  return varint_internal::SkipVarintFromReader<uint32_t, /*canonical=*/false,
                                               1>(src);
}

inline bool SkipVarint64(Reader& src) {
  if (ABSL_PREDICT_FALSE(!src.Pull(1, kMaxLengthVarint64))) return false;
  const uint8_t byte0 = static_cast<uint8_t>(src.cursor()[0]);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    src.move_cursor(1);
    return true;
  }
  if (ABSL_PREDICT_TRUE(src.available() >= 2)) {
    const uint8_t byte1 = static_cast<uint8_t>(src.cursor()[1]);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      src.move_cursor(2);
      return true;
    }
    return varint_internal::SkipVarintFromReaderBuffer<uint64_t,
                                                       /*canonical=*/false, 2>(
        src, src.cursor());
  }
  return varint_internal::SkipVarintFromReader<uint64_t, /*canonical=*/false,
                                               1>(src);
}

inline bool SkipCanonicalVarint32(Reader& src) {
  if (ABSL_PREDICT_FALSE(!src.Pull(1, kMaxLengthVarint32))) return false;
  const uint8_t byte0 = static_cast<uint8_t>(src.cursor()[0]);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    src.move_cursor(1);
    return true;
  }
  if (ABSL_PREDICT_TRUE(src.available() >= 2)) {
    const uint8_t byte1 = static_cast<uint8_t>(src.cursor()[1]);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      if (ABSL_PREDICT_FALSE(byte1 == 0)) return false;
      src.move_cursor(2);
      return true;
    }
    return varint_internal::SkipVarintFromReaderBuffer<uint32_t,
                                                       /*canonical=*/true, 2>(
        src, src.cursor());
  }
  return varint_internal::SkipVarintFromReader<uint32_t, /*canonical=*/true, 1>(
      src);
}

inline bool SkipCanonicalVarint64(Reader& src) {
  if (ABSL_PREDICT_FALSE(!src.Pull(1, kMaxLengthVarint64))) return false;
  const uint8_t byte0 = static_cast<uint8_t>(src.cursor()[0]);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    src.move_cursor(1);
    return true;
  }
  if (ABSL_PREDICT_TRUE(src.available() >= 2)) {
    const uint8_t byte1 = static_cast<uint8_t>(src.cursor()[1]);
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      if (ABSL_PREDICT_FALSE(byte1 == 0)) return false;
      src.move_cursor(2);
      return true;
    }
    return varint_internal::SkipVarintFromReaderBuffer<uint64_t,
                                                       /*canonical=*/true, 2>(
        src, src.cursor());
  }
  return varint_internal::SkipVarintFromReader<uint64_t, /*canonical=*/true, 1>(
      src);
}

namespace varint_internal {

template <typename T, bool canonical, size_t initial_index>
bool SkipVarintFromCordBuffer(absl::Cord::CharIterator& src, size_t available);

extern template bool SkipVarintFromCordBuffer<uint32_t, false, 2>(
    absl::Cord::CharIterator& src, size_t available);
extern template bool SkipVarintFromCordBuffer<uint64_t, false, 2>(
    absl::Cord::CharIterator& src, size_t available);
extern template bool SkipVarintFromCordBuffer<uint32_t, true, 2>(
    absl::Cord::CharIterator& src, size_t available);
extern template bool SkipVarintFromCordBuffer<uint64_t, true, 2>(
    absl::Cord::CharIterator& src, size_t available);

template <typename T, bool canonical, size_t initial_index>
bool SkipVarintFromCord(absl::Cord::CharIterator& src, size_t available);

extern template bool SkipVarintFromCord<uint32_t, false, 1>(
    absl::Cord::CharIterator& src, size_t available);
extern template bool SkipVarintFromCord<uint64_t, false, 1>(
    absl::Cord::CharIterator& src, size_t available);
extern template bool SkipVarintFromCord<uint32_t, true, 1>(
    absl::Cord::CharIterator& src, size_t available);
extern template bool SkipVarintFromCord<uint64_t, true, 1>(
    absl::Cord::CharIterator& src, size_t available);

}  // namespace varint_internal

inline bool SkipVarint32(absl::Cord::CharIterator& src, size_t available) {
  RIEGELI_ASSERT_LE(available, varint_internal::Remaining(src))
      << "Failed precondition of SkipVarint32(): not enough remaining data";
  if (ABSL_PREDICT_FALSE(available == 0)) return false;
  const uint32_t byte0 = uint32_t{static_cast<uint8_t>(*src)};
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    ++src;
    return true;
  }
  if (ABSL_PREDICT_FALSE(available == 1)) return false;
  const absl::string_view chunk = absl::Cord::ChunkRemaining(src);
  if (ABSL_PREDICT_TRUE(chunk.size() >= 2)) {
    const uint32_t byte1 = uint32_t{static_cast<uint8_t>(chunk[1])};
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      absl::Cord::Advance(&src, 2);
      return true;
    }
    return varint_internal::SkipVarintFromCordBuffer<uint32_t,
                                                     /*canonical=*/false, 2>(
        src, available);
  }
  return varint_internal::SkipVarintFromCord<uint32_t,
                                             /*canonical=*/false, 1>(src,
                                                                     available);
}

inline bool SkipVarint64(absl::Cord::CharIterator& src, size_t available) {
  RIEGELI_ASSERT_LE(available, varint_internal::Remaining(src))
      << "Failed precondition of SkipVarint64(): not enough remaining data";
  if (ABSL_PREDICT_FALSE(available == 0)) return false;
  const uint64_t byte0 = uint64_t{static_cast<uint8_t>(*src)};
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    ++src;
    return true;
  }
  if (ABSL_PREDICT_FALSE(available == 1)) return false;
  const absl::string_view chunk = absl::Cord::ChunkRemaining(src);
  if (ABSL_PREDICT_TRUE(chunk.size() >= 2)) {
    const uint64_t byte1 = uint64_t{static_cast<uint8_t>(chunk[1])};
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      absl::Cord::Advance(&src, 2);
      return true;
    }
    return varint_internal::SkipVarintFromCordBuffer<uint64_t,
                                                     /*canonical=*/false, 2>(
        src, available);
  }
  return varint_internal::SkipVarintFromCord<uint64_t,
                                             /*canonical=*/false, 1>(src,
                                                                     available);
}

inline bool SkipCanonicalVarint32(absl::Cord::CharIterator& src,
                                  size_t available) {
  RIEGELI_ASSERT_LE(available, varint_internal::Remaining(src))
      << "Failed precondition of SkipCanonicalVarint32(): "
         "not enough remaining data";
  if (ABSL_PREDICT_FALSE(available == 0)) return false;
  const uint32_t byte0 = uint32_t{static_cast<uint8_t>(*src)};
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    ++src;
    return true;
  }
  if (ABSL_PREDICT_FALSE(available == 1)) return false;
  const absl::string_view chunk = absl::Cord::ChunkRemaining(src);
  if (ABSL_PREDICT_TRUE(chunk.size() >= 2)) {
    const uint32_t byte1 = uint32_t{static_cast<uint8_t>(chunk[1])};
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      if (ABSL_PREDICT_FALSE(byte1 == 0)) return false;
      absl::Cord::Advance(&src, 2);
      return true;
    }
    return varint_internal::SkipVarintFromCordBuffer<uint32_t,
                                                     /*canonical=*/true, 2>(
        src, available);
  }
  return varint_internal::SkipVarintFromCord<uint32_t,
                                             /*canonical=*/true, 1>(src,
                                                                    available);
}

inline bool SkipCanonicalVarint64(absl::Cord::CharIterator& src,
                                  size_t available) {
  RIEGELI_ASSERT_LE(available, varint_internal::Remaining(src))
      << "Failed precondition of SkipCanonicalVarint64(): "
         "not enough remaining data";
  if (ABSL_PREDICT_FALSE(available == 0)) return false;
  const uint64_t byte0 = uint64_t{static_cast<uint8_t>(*src)};
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) {
    ++src;
    return true;
  }
  if (ABSL_PREDICT_FALSE(available == 1)) return false;
  const absl::string_view chunk = absl::Cord::ChunkRemaining(src);
  if (ABSL_PREDICT_TRUE(chunk.size() >= 2)) {
    const uint64_t byte1 = uint64_t{static_cast<uint8_t>(chunk[1])};
    if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
      if (ABSL_PREDICT_FALSE(byte1 == 0)) return false;
      absl::Cord::Advance(&src, 2);
      return true;
    }
    return varint_internal::SkipVarintFromCordBuffer<uint64_t,
                                                     /*canonical=*/true, 2>(
        src, available);
  }
  return varint_internal::SkipVarintFromCord<uint64_t,
                                             /*canonical=*/true, 1>(src,
                                                                    available);
}

namespace varint_internal {

template <typename T, bool canonical, size_t initial_index>
size_t SkipVarintFromArray(const char* src, size_t available);

extern template size_t SkipVarintFromArray<uint32_t, false, 2>(
    const char* src, size_t available);
extern template size_t SkipVarintFromArray<uint64_t, false, 2>(
    const char* src, size_t available);
extern template size_t SkipVarintFromArray<uint32_t, true, 2>(const char* src,
                                                              size_t available);
extern template size_t SkipVarintFromArray<uint64_t, true, 2>(const char* src,
                                                              size_t available);

}  // namespace varint_internal

inline size_t SkipVarint32(const char* src, size_t available) {
  if (ABSL_PREDICT_FALSE(available == 0)) return 0;
  const uint8_t byte0 = static_cast<uint8_t>(src[0]);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) return 1;
  if (ABSL_PREDICT_FALSE(available == 1)) return 0;
  const uint8_t byte1 = static_cast<uint8_t>(src[1]);
  if (ABSL_PREDICT_TRUE(byte1 < 0x80)) return 2;
  return varint_internal::SkipVarintFromArray<uint32_t, /*canonical=*/false, 2>(
      src, available);
}

inline size_t SkipVarint64(const char* src, size_t available) {
  if (ABSL_PREDICT_FALSE(available == 0)) return 0;
  const uint8_t byte0 = static_cast<uint8_t>(src[0]);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) return 1;
  if (ABSL_PREDICT_FALSE(available == 1)) return 0;
  const uint8_t byte1 = static_cast<uint8_t>(src[1]);
  if (ABSL_PREDICT_TRUE(byte1 < 0x80)) return 2;
  return varint_internal::SkipVarintFromArray<uint64_t, /*canonical=*/false, 2>(
      src, available);
}

inline size_t SkipCanonicalVarint32(const char* src, size_t available) {
  if (ABSL_PREDICT_FALSE(available == 0)) return 0;
  const uint8_t byte0 = static_cast<uint8_t>(src[0]);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) return 1;
  if (ABSL_PREDICT_FALSE(available == 1)) return 0;
  const uint8_t byte1 = static_cast<uint8_t>(src[1]);
  if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
    if (ABSL_PREDICT_FALSE(byte1 == 0)) return 0;
    return 2;
  }
  return varint_internal::SkipVarintFromArray<uint32_t, /*canonical=*/true, 2>(
      src, available);
}

inline size_t SkipCanonicalVarint64(const char* src, size_t available) {
  if (ABSL_PREDICT_FALSE(available == 0)) return 0;
  const uint8_t byte0 = static_cast<uint8_t>(src[0]);
  if (ABSL_PREDICT_TRUE(byte0 < 0x80)) return 1;
  if (ABSL_PREDICT_FALSE(available == 1)) return 0;
  const uint8_t byte1 = static_cast<uint8_t>(src[1]);
  if (ABSL_PREDICT_TRUE(byte1 < 0x80)) {
    if (ABSL_PREDICT_FALSE(byte1 == 0)) return 0;
    return 2;
  }
  return varint_internal::SkipVarintFromArray<uint64_t, /*canonical=*/true, 2>(
      src, available);
}

constexpr int32_t DecodeVarintSigned32(uint32_t repr) {
  return static_cast<int32_t>((repr >> 1) ^ (~(repr & 1) + 1));
}

constexpr int64_t DecodeVarintSigned64(uint64_t repr) {
  return static_cast<int64_t>((repr >> 1) ^ (~(repr & 1) + 1));
}

}  // namespace riegeli

#endif  // RIEGELI_VARINT_VARINT_READING_H_


================================================
FILE: riegeli/varint/varint_writing.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_VARINT_VARINT_WRITING_H_
#define RIEGELI_VARINT_VARINT_WRITING_H_

#include <stddef.h>
#include <stdint.h>

#include "absl/base/optimization.h"
#include "absl/numeric/bits.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/constexpr.h"
#include "riegeli/bytes/backward_writer.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/varint/varint_internal.h"  // IWYU pragma: export

namespace riegeli {

// Writes a varint. This corresponds to protobuf types `{int,uint}{32,64}`
// (with a cast needed in the case of `int{32,64}`).
//
// Warning: protobuf writes values of type `int32` by casting them to `uint64`,
// not `uint32` (negative values take 10 bytes, not 5), hence they must be
// written with `WriteVarint64()`, not `WriteVarint32()`, if negative values are
// possible.
//
// Return values:
//  * `true`  - success (`dest.ok()`)
//  * `false` - failure (`!dest.ok()`)
bool WriteVarint32(uint32_t data, Writer& dest);
bool WriteVarint64(uint64_t data, Writer& dest);
bool WriteVarint32(uint32_t data, BackwardWriter& dest);
bool WriteVarint64(uint64_t data, BackwardWriter& dest);

// Writes a signed varint (zigzag-encoded). This corresponds to protobuf types
// `sint{32,64}`.
//
// Return values:
//  * `true`  - success (`dest.ok()`)
//  * `false` - failure (`!dest.ok()`)
bool WriteVarintSigned32(int32_t data, Writer& dest);
bool WriteVarintSigned64(int64_t data, Writer& dest);
bool WriteVarintSigned32(int32_t data, BackwardWriter& dest);
bool WriteVarintSigned64(int64_t data, BackwardWriter& dest);

// Returns the length needed to write a given value as a varint.
// This corresponds to protobuf types `{int,uint}{32,64}` (with a cast needed in
// the case of `int{32,64}`).
//
// Warning: protobuf writes values of type `int32` by casting them to `uint64`,
// not `uint32` (negative values take 10 bytes, not 5), hence they must be
// measured with `LengthVarint64()`, not `LengthVarint32()`, if negative values
// are possible.
//
// The result is at most `kMaxLengthVarint{32,64}`.
constexpr size_t LengthVarint32(uint32_t data);
constexpr size_t LengthVarint64(uint64_t data);

// Returns the length needed to write a given value as a signed varint
// (zigzag-encoded). This corresponds to protobuf types `sint{32,64}`.
//
// The result is at most `kMaxLengthVarint{32,64}`.
constexpr size_t LengthVarintSigned32(int32_t data);
constexpr size_t LengthVarintSigned64(int64_t data);

// Writes a varint to an array. This corresponds to protobuf types
// `{int,uint}{32,64}` (with a cast needed in the case of `int{32,64}`).
//
// Warning: protobuf writes values of type `int32` by casting them to `uint64`,
// not `uint32` (negative values take 10 bytes, not 5), hence they must be
// written with `WriteVarint64()`, not `WriteVarint32()`, if negative values are
// possible.
//
// Writes at most `LengthVarint{32,64}(data)` bytes to `dest[]`. Returns the
// updated `dest` after the written value.
char* WriteVarint32(uint32_t data, char* dest);
char* WriteVarint64(uint64_t data, char* dest);

// Writes a signed varint (zigzag-encoded) to an array. This corresponds to
// protobuf types `sint{32,64}`.
//
// Writes at most `LengthVarintSigned{32,64}(data)` bytes to `dest[]`. Returns
// the updated `dest` after the written value.
char* WriteVarintSigned32(int32_t data, char* dest);
char* WriteVarintSigned64(int64_t data, char* dest);

// Encodes a signed varint (zigzag-encoding) as an unsigned value to be written
// as a plain varint. This corresponds to protobuf types `sint{32,64}`.
constexpr uint32_t EncodeVarintSigned32(int32_t value);
constexpr uint64_t EncodeVarintSigned64(int64_t value);

// Implementation details follow.

inline bool WriteVarint32(uint32_t data, Writer& dest) {
  if (ABSL_PREDICT_FALSE(!dest.Push(RIEGELI_IS_CONSTANT(data) ||
                                            RIEGELI_IS_CONSTANT(data < 0x80)
                                        ? LengthVarint32(data)
                                        : kMaxLengthVarint32))) {
    return false;
  }
  dest.set_cursor(WriteVarint32(data, dest.cursor()));
  return true;
}

inline bool WriteVarint64(uint64_t data, Writer& dest) {
  if (ABSL_PREDICT_FALSE(!dest.Push(RIEGELI_IS_CONSTANT(data) ||
                                            RIEGELI_IS_CONSTANT(data < 0x80)
                                        ? LengthVarint64(data)
                                        : kMaxLengthVarint64))) {
    return false;
  }
  dest.set_cursor(WriteVarint64(data, dest.cursor()));
  return true;
}

inline bool WriteVarint32(uint32_t data, BackwardWriter& dest) {
  const size_t length = LengthVarint32(data);
  if (ABSL_PREDICT_FALSE(!dest.Push(length))) return false;
  dest.move_cursor(length);
  WriteVarint32(data, dest.cursor());
  return true;
}

inline bool WriteVarint64(uint64_t data, BackwardWriter& dest) {
  const size_t length = LengthVarint64(data);
  if (ABSL_PREDICT_FALSE(!dest.Push(length))) return false;
  dest.move_cursor(length);
  WriteVarint64(data, dest.cursor());
  return true;
}

inline bool WriteVarintSigned32(int32_t data, Writer& dest) {
  return WriteVarint32(EncodeVarintSigned32(data), dest);
}

inline bool WriteVarintSigned64(int64_t data, Writer& dest) {
  return WriteVarint64(EncodeVarintSigned64(data), dest);
}

inline bool WriteVarintSigned32(int32_t data, BackwardWriter& dest) {
  return WriteVarint32(EncodeVarintSigned32(data), dest);
}

inline bool WriteVarintSigned64(int64_t data, BackwardWriter& dest) {
  return WriteVarint64(EncodeVarintSigned64(data), dest);
}

constexpr size_t LengthVarint32(uint32_t data) {
  const size_t width = IntCast<size_t>(absl::bit_width(data | 1));
  // This is the same as `(width + 6) / 7` for `width` in [1..32],
  // but performs division by a power of 2.
  return (width * 9 + 64) / 64;
}

constexpr size_t LengthVarint64(uint64_t data) {
  const size_t width = IntCast<size_t>(absl::bit_width(data | 1));
  // This is the same as `(width + 6) / 7` for `width` in [1..64],
  // but performs division by a power of 2.
  return (width * 9 + 64) / 64;
}

constexpr size_t LengthVarintSigned32(int32_t data) {
  return LengthVarint32(EncodeVarintSigned32(data));
}

constexpr size_t LengthVarintSigned64(int64_t data) {
  return LengthVarint64(EncodeVarintSigned64(data));
}

inline char* WriteVarint32(uint32_t data, char* dest) {
  if (ABSL_PREDICT_TRUE(data < 0x80)) {
    *dest++ = static_cast<char>(data);
    return dest;
  }
  do {
    *dest++ = static_cast<char>(data | 0x80);
    data >>= 7;
  } while (data >= 0x80);
  *dest++ = static_cast<char>(data);
  return dest;
}

inline char* WriteVarint64(uint64_t data, char* dest) {
  if (ABSL_PREDICT_TRUE(data < 0x80)) {
    *dest++ = static_cast<char>(data);
    return dest;
  }
  do {
    *dest++ = static_cast<char>(data | 0x80);
    data >>= 7;
  } while (data >= 0x80);
  *dest++ = static_cast<char>(data);
  return dest;
}

inline char* WriteVarintSigned32(int32_t data, char* dest) {
  return WriteVarint32(EncodeVarintSigned32(data), dest);
}

inline char* WriteVarintSigned64(int64_t data, char* dest) {
  return WriteVarint64(EncodeVarintSigned64(data), dest);
}

constexpr uint32_t EncodeVarintSigned32(int32_t value) {
  return (static_cast<uint32_t>(value) << 1) ^
         static_cast<uint32_t>(value >> 31);
}

constexpr uint64_t EncodeVarintSigned64(int64_t value) {
  return (static_cast<uint64_t>(value) << 1) ^
         static_cast<uint64_t>(value >> 63);
}

}  // namespace riegeli

#endif  // RIEGELI_VARINT_VARINT_WRITING_H_


================================================
FILE: riegeli/xz/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "xz_reader",
    srcs = ["xz_reader.cc"],
    hdrs = ["xz_reader.h"],
    deps = [
        ":xz_error",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:compare",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:recycling_pool",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:buffer_options",
        "//riegeli/bytes:buffered_reader",
        "//riegeli/bytes:reader",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@xz//:lzma",
    ],
)

cc_library(
    name = "xz_writer",
    srcs = ["xz_writer.cc"],
    hdrs = ["xz_writer.h"],
    deps = [
        ":xz_error",
        ":xz_reader",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:compare",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:recycling_pool",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:buffer_options",
        "//riegeli/bytes:buffered_writer",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@xz//:lzma",
    ],
)

cc_library(
    name = "xz_error",
    srcs = ["xz_error.cc"],
    hdrs = ["xz_error.h"],
    visibility = ["//visibility:private"],
    deps = [
        "//riegeli/base:assert",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@xz//:lzma",
    ],
)


================================================
FILE: riegeli/xz/xz_error.cc
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/xz/xz_error.h"

#include <string>

#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "lzma.h"
#include "riegeli/base/assert.h"

namespace riegeli::xz_internal {

absl::Status XzErrorToStatus(absl::string_view operation,
                             lzma_ret liblzma_code) {
  absl::StatusCode code;
  switch (liblzma_code) {
    case LZMA_OK:
      return absl::OkStatus();
    case LZMA_NO_CHECK:
    case LZMA_UNSUPPORTED_CHECK:
    case LZMA_DATA_ERROR:
      code = absl::StatusCode::kInvalidArgument;
      break;
    case LZMA_MEM_ERROR:
    case LZMA_MEMLIMIT_ERROR:
      code = absl::StatusCode::kResourceExhausted;
      break;
    default:
      // Should not happen.
      code = absl::StatusCode::kInternal;
      break;
  }
  std::string message = absl::StrCat(operation, " failed");
  absl::string_view details;
  switch (liblzma_code) {
    case LZMA_OK:
      RIEGELI_ASSUME_UNREACHABLE() << "Handled before switch";
    case LZMA_STREAM_END:
      details = "End of stream was reached";
      break;
    case LZMA_NO_CHECK:
      details = "Input stream has no integrity check";
      break;
    case LZMA_UNSUPPORTED_CHECK:
      details = "Cannot calculate the integrity check";
      break;
    case LZMA_GET_CHECK:
      details = "Integrity check type is now available";
      break;
    case LZMA_MEM_ERROR:
      details = "Cannot allocate memory";
      break;
    case LZMA_MEMLIMIT_ERROR:
      details = "Memory usage limit was reached";
      break;
    case LZMA_FORMAT_ERROR:
      details = "File format not recognized";
      break;
    case LZMA_OPTIONS_ERROR:
      details = "Invalid or unsupported options";
      break;
    case LZMA_DATA_ERROR:
      details = "Data is corrupt";
      break;
    case LZMA_BUF_ERROR:
      details = "No progress is possible";
      break;
    case LZMA_PROG_ERROR:
      details = "Programming error";
      break;
    default:
      absl::StrAppend(&message, ": unknown liblzma error code: ", liblzma_code);
      break;
  }
  if (!details.empty()) absl::StrAppend(&message, ": ", details);
  return absl::Status(code, message);
}

}  // namespace riegeli::xz_internal


================================================
FILE: riegeli/xz/xz_error.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_XZ_XZ_ERROR_H_
#define RIEGELI_XZ_XZ_ERROR_H_

#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "lzma.h"

namespace riegeli::xz_internal {

absl::Status XzErrorToStatus(absl::string_view operation,
                             lzma_ret liblzma_code);

}  // namespace riegeli::xz_internal

#endif  // RIEGELI_XZ_XZ_ERROR_H_


================================================
FILE: riegeli/xz/xz_reader.cc
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/xz/xz_reader.h"

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <memory>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "lzma.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/xz/xz_error.h"

namespace riegeli {

void XzReaderBase::Initialize(Reader* src) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of XzReader: null Reader pointer";
  if (ABSL_PREDICT_FALSE(!src->ok()) && src->available() == 0) {
    FailWithoutAnnotation(AnnotateOverSrc(src->status()));
    return;
  }
  initial_compressed_pos_ = src->pos();
  InitializeDecompressor();
}

inline void XzReaderBase::InitializeDecompressor() {
  decompressor_ =
      KeyedRecyclingPool<lzma_stream, LzmaStreamKey, LzmaStreamDeleter>::global(
          recycling_pool_options_)
          .Get(LzmaStreamKey(container_),
               [] { return riegeli::Maker<lzma_stream>(); });
  switch (container_) {
    case Container::kXz: {
      const lzma_ret liblzma_code = lzma_stream_decoder(
          decompressor_.get(), std::numeric_limits<uint64_t>::max(), flags_);
      if (ABSL_PREDICT_FALSE(liblzma_code != LZMA_OK)) {
        FailOperation("lzma_stream_decoder()", liblzma_code);
      }
      return;
    }
    case Container::kLzma: {
      const lzma_ret liblzma_code = lzma_alone_decoder(
          decompressor_.get(), std::numeric_limits<uint64_t>::max());
      if (ABSL_PREDICT_FALSE(liblzma_code != LZMA_OK)) {
        FailOperation("lzma_alone_decoder()", liblzma_code);
      }
      return;
    }
    case Container::kXzOrLzma: {
      const lzma_ret liblzma_code = lzma_auto_decoder(
          decompressor_.get(), std::numeric_limits<uint64_t>::max(), flags_);
      if (ABSL_PREDICT_FALSE(liblzma_code != LZMA_OK)) {
        FailOperation("lzma_auto_decoder()", liblzma_code);
      }
      return;
    }
  }
  RIEGELI_ASSUME_UNREACHABLE()
      << "Unknown container format: " << static_cast<int>(container_);
}

void XzReaderBase::Done() {
  if (ABSL_PREDICT_FALSE(TruncatedAtClose())) {
    Reader& src = *SrcReader();
    FailWithoutAnnotation(AnnotateOverSrc(src.AnnotateStatus(
        absl::InvalidArgumentError("Truncated Xz-compressed stream"))));
  }
  BufferedReader::Done();
  decompressor_.reset();
}

inline bool XzReaderBase::FailOperation(absl::string_view operation,
                                        lzma_ret liblzma_code) {
  RIEGELI_ASSERT_NE(liblzma_code, LZMA_OK)
      << "Failed precondition of XzReaderBase::FailOperation(): "
         "liblzma error code not failed";
  RIEGELI_ASSERT(is_open())
      << "Failed precondition of XzReaderBase::FailOperation(): "
         "Object closed";
  return Fail(xz_internal::XzErrorToStatus(operation, liblzma_code));
}

absl::Status XzReaderBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    if (ABSL_PREDICT_FALSE(truncated_ && (flags_ & LZMA_CONCATENATED) == 0)) {
      status = Annotate(status, "reading truncated Xz-compressed stream");
    }
    Reader& src = *SrcReader();
    status = src.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `src` with the compressed position.
  // Clarify that the current position is the uncompressed position instead of
  // delegating to `BufferedReader::AnnotateStatusImpl()`.
  return AnnotateOverSrc(std::move(status));
}

absl::Status XzReaderBase::AnnotateOverSrc(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("at uncompressed byte ", pos()));
  }
  return status;
}

inline bool XzReaderBase::TruncatedAtClose() {
  if (!truncated_) return false;
  if ((flags_ & LZMA_CONCATENATED) == 0) return true;
  Reader& src = *SrcReader();
  if (src.pos() == initial_compressed_pos_) {
    // Empty concatenated stream.
    return false;
  }
  // Check if the stream ends cleanly.
  decompressor_->next_out = nullptr;
  decompressor_->avail_out = 0;
  decompressor_->next_in = nullptr;
  decompressor_->avail_in = 0;
  const lzma_ret liblzma_code = lzma_code(decompressor_.get(), LZMA_FINISH);
  switch (liblzma_code) {
    case LZMA_OK:
      RIEGELI_ASSUME_UNREACHABLE()
          << "lzma_code(LZMA_FINISH) with no buffer returned LZMA_OK";
    case LZMA_BUF_ERROR:
      return true;
    case LZMA_STREAM_END:
      return false;
    default:
      FailOperation("lzma_code()", liblzma_code);
      return false;
  }
}

bool XzReaderBase::ReadInternal(size_t min_length, size_t max_length,
                                char* dest) {
  RIEGELI_ASSERT_GT(min_length, 0u)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "nothing to read";
  RIEGELI_ASSERT_GE(max_length, min_length)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "max_length < min_length";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedReader::ReadInternal()";
  Reader& src = *SrcReader();
  truncated_ = false;
  max_length = UnsignedMin(max_length,
                           std::numeric_limits<Position>::max() - limit_pos());
  decompressor_->next_out = reinterpret_cast<uint8_t*>(dest);
  for (;;) {
    decompressor_->avail_out = PtrDistance(
        reinterpret_cast<char*>(decompressor_->next_out), dest + max_length);
    decompressor_->next_in = reinterpret_cast<const uint8_t*>(src.cursor());
    decompressor_->avail_in = src.available();
    const lzma_ret liblzma_code = lzma_code(decompressor_.get(), LZMA_RUN);
    src.set_cursor(reinterpret_cast<const char*>(decompressor_->next_in));
    const size_t length_read =
        PtrDistance(dest, reinterpret_cast<char*>(decompressor_->next_out));
    switch (liblzma_code) {
      case LZMA_OK:
        if (length_read >= min_length) break;
        ABSL_FALLTHROUGH_INTENDED;
      case LZMA_BUF_ERROR:
        if (ABSL_PREDICT_FALSE(decompressor_->avail_in > 0)) {
          RIEGELI_ASSERT_EQ(decompressor_->avail_out, 0u)
              << "lzma_code() returned but there are still input data "
                 "and output space";
          RIEGELI_ASSERT_EQ(length_read,
                            std::numeric_limits<Position>::max() - limit_pos())
              << "The position does not overflow but the output buffer is "
                 "full, while less than min_length was output, which is "
                 "impossible because the buffer has size max_length which is "
                 "at least min_length if the position does not overflow";
          move_limit_pos(length_read);
          return FailOverflow();
        }
        if (ABSL_PREDICT_FALSE(!src.Pull())) {
          move_limit_pos(length_read);
          if (ABSL_PREDICT_FALSE(!src.ok())) {
            return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
          }
          truncated_ = true;
          return false;
        }
        continue;
      case LZMA_STREAM_END:
        decompressor_.reset();
        move_limit_pos(length_read);
        // Avoid `BufferedReader` allocating another buffer.
        set_exact_size(limit_pos());
        return length_read >= min_length;
      default:
        FailOperation("lzma_code()", liblzma_code);
        break;
    }
    move_limit_pos(length_read);
    return length_read >= min_length;
  }
}

void XzReaderBase::ExactSizeReached() {
  if (decompressor_ == nullptr) return;
  char buffer[1];
  if (ABSL_PREDICT_FALSE(XzReaderBase::ReadInternal(1, 1, buffer))) {
    decompressor_.reset();
    Fail(absl::FailedPreconditionError(
        "Uncompressed size reached but more data can be decompressed, "
        "which implies that seeking back and reading again encountered "
        "changed Xz-compressed data"));
  }
}

bool XzReaderBase::ToleratesReadingAhead() {
  Reader* const src = SrcReader();
  return src != nullptr && src->ToleratesReadingAhead();
}

bool XzReaderBase::SupportsRewind() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsRewind();
}

bool XzReaderBase::SeekBehindBuffer(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "buffer not empty";
  if (new_pos <= limit_pos()) {
    // Seeking backwards.
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    Reader& src = *SrcReader();
    truncated_ = false;
    set_buffer();
    set_limit_pos(0);
    decompressor_.reset();
    if (ABSL_PREDICT_FALSE(!src.Seek(initial_compressed_pos_))) {
      return FailWithoutAnnotation(AnnotateOverSrc(src.StatusOrAnnotate(
          absl::DataLossError("Xz-compressed stream got truncated"))));
    }
    InitializeDecompressor();
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    if (new_pos == 0) return true;
  }
  return BufferedReader::SeekBehindBuffer(new_pos);
}

bool XzReaderBase::SupportsNewReader() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsNewReader();
}

std::unique_ptr<Reader> XzReaderBase::NewReaderImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point
  // if `SrcReader()->SupportsNewReader()`.
  Reader& src = *SrcReader();
  std::unique_ptr<Reader> compressed_reader =
      src.NewReader(initial_compressed_pos_);
  if (ABSL_PREDICT_FALSE(compressed_reader == nullptr)) {
    FailWithoutAnnotation(AnnotateOverSrc(src.status()));
    return nullptr;
  }
  std::unique_ptr<Reader> reader =
      std::make_unique<XzReader<std::unique_ptr<Reader>>>(
          std::move(compressed_reader),
          XzReaderBase::Options()
              .set_container(container_)
              .set_concatenate((flags_ & LZMA_CONCATENATED) != 0)
              .set_buffer_options(buffer_options())
              .set_recycling_pool_options(recycling_pool_options_));
  reader->Seek(initial_pos);
  return reader;
}

bool RecognizeXz(Reader& src) {
  static constexpr char kMagic[] = {'\xfd', '7', 'z', 'X', 'Z', '\x00'};
  return src.Pull(sizeof(kMagic)) &&
         absl::string_view(src.cursor(), sizeof(kMagic)) ==
             absl::string_view(kMagic, sizeof(kMagic));
}

}  // namespace riegeli


================================================
FILE: riegeli/xz/xz_reader.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_XZ_XZ_READER_H_
#define RIEGELI_XZ_XZ_READER_H_

#include <stddef.h>
#include <stdint.h>

#include <memory>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "lzma.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_reader.h"
#include "riegeli/bytes/reader.h"

namespace riegeli {

// Template parameter independent part of `XzReader`.
class XzReaderBase : public BufferedReader {
 public:
  // Specifies what container format to expect.
  enum class Container {
    kXz,        // Xz container (recommended).
    kLzma,      // Lzma container (legacy file format).
    kXzOrLzma,  // Xz or Lzma container.
  };

  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // What container format to expect.
    //
    // Default: `Container::kXzOrLzma`.
    static constexpr Container kDefaultContainer = Container::kXzOrLzma;
    Options& set_container(Container container) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      container_ = container;
      return *this;
    }
    Options&& set_container(Container container) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_container(container));
    }
    Container container() const { return container_; }

    // If `true`, concatenated compressed streams are decoded to concatenation
    // of their decompressed contents. An empty compressed stream is decoded to
    // empty decompressed contents.
    //
    // If `false`, exactly one compressed stream is consumed.
    //
    // `concatenate()` is supported only for `Container::kXz` and
    // `Container::kXzOrLzma` (if the actual format is `kXz`)
    //
    // Default: `false`.
    Options& set_concatenate(bool concatenate) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      concatenate_ = concatenate;
      return *this;
    }
    Options&& set_concatenate(bool concatenate) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_concatenate(concatenate));
    }
    bool concatenate() const { return concatenate_; }

    // Options for a global `RecyclingPool` of decompression contexts.
    //
    // They tune the amount of memory which is kept to speed up creation of new
    // decompression sessions, and usage of a background thread to clean it.
    //
    // Default: `RecyclingPoolOptions()`.
    Options& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      recycling_pool_options_ = recycling_pool_options;
      return *this;
    }
    Options&& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_recycling_pool_options(recycling_pool_options));
    }
    const RecyclingPoolOptions& recycling_pool_options() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recycling_pool_options_;
    }

   private:
    Container container_ = kDefaultContainer;
    bool concatenate_ = false;
    RecyclingPoolOptions recycling_pool_options_;
  };

  // Returns the compressed `Reader`. Unchanged by `Close()`.
  virtual Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns `true` if the source is truncated (without a clean end of the
  // compressed stream) at the current position. In such case, if the source
  // does not grow, `Close()` will fail.
  //
  // Precondition: `Options::concatenate()` was `false`.
  bool truncated() const {
    RIEGELI_ASSERT_EQ(flags_ & LZMA_CONCATENATED, 0u)
        << "Failed precondition of XzReaderBase::truncated(): "
           "Options::concatenate() is true";
    return truncated_ && available() == 0;
  }

  bool ToleratesReadingAhead() override;
  bool SupportsRewind() override;
  bool SupportsNewReader() override;

 protected:
  explicit XzReaderBase(Closed) noexcept : BufferedReader(kClosed) {}

  explicit XzReaderBase(BufferOptions buffer_options, Container container,
                        uint32_t flags,
                        const RecyclingPoolOptions& recycling_pool_options);

  XzReaderBase(XzReaderBase&& that) noexcept;
  XzReaderBase& operator=(XzReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options, Container container, uint32_t flags,
             const RecyclingPoolOptions& recycling_pool_options);
  static int GetWindowBits(const Options& options);
  void Initialize(Reader* src);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverSrc(absl::Status status);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool ReadInternal(size_t min_length, size_t max_length, char* dest) override;
  void ExactSizeReached() override;
  bool SeekBehindBuffer(Position new_pos) override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;

 private:
  struct LzmaStreamDeleter {
    void operator()(lzma_stream* ptr) const {
      lzma_end(ptr);
      delete ptr;
    }
  };

  struct LzmaStreamKey : WithEqual<LzmaStreamKey> {
    LzmaStreamKey() = default;
    explicit LzmaStreamKey(Container container) : container(container) {}

    friend bool operator==(LzmaStreamKey a, LzmaStreamKey b) {
      return a.container == b.container;
    }
    template <typename HashState>
    friend HashState AbslHashValue(HashState hash_state, LzmaStreamKey self) {
      return HashState::combine(std::move(hash_state), self.container);
    }

    Container container;
  };

  void InitializeDecompressor();
  ABSL_ATTRIBUTE_COLD bool FailOperation(absl::string_view operation,
                                         lzma_ret liblzma_code);
  bool TruncatedAtClose();

  Container container_ = Options::kDefaultContainer;
  uint32_t flags_ = 0;
  Position initial_compressed_pos_ = 0;
  // If `true`, the source is truncated (without a clean end of the compressed
  // stream) at the current position. If the source does not grow, `Close()`
  // will fail.
  bool truncated_ = false;
  RecyclingPoolOptions recycling_pool_options_;
  // If `ok()` but `decompressor_ == nullptr` then all data have been
  // decompressed, `exact_size() == limit_pos()`, and `ReadInternal()` must not
  // be called again.
  KeyedRecyclingPool<lzma_stream, LzmaStreamKey, LzmaStreamDeleter>::Handle
      decompressor_;
};

// A `Reader` which decompresses data with Xz (LZMA) after getting it from
// another `Reader`.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the compressed `Reader`. `Src` must support
// `Dependency<Reader*, Src>`, e.g. `Reader*` (not owned, default),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The compressed `Reader` must not be accessed until the `XzReader` is closed
// or no longer used.
template <typename Src = Reader*>
class XzReader : public XzReaderBase {
 public:
  // Creates a closed `XzReader`.
  explicit XzReader(Closed) noexcept : XzReaderBase(kClosed) {}

  // Will read from the compressed `Reader` provided by `src`.
  explicit XzReader(Initializer<Src> src, Options options = Options());

  XzReader(XzReader&&) = default;
  XzReader& operator=(XzReader&&) = default;

  // Makes `*this` equivalent to a newly constructed `XzReader`. This avoids
  // constructing a temporary `XzReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());

  // Returns the object providing and possibly owning the compressed `Reader`.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;
  void SetReadAllHintImpl(bool read_all_hint) override;
  void VerifyEndImpl() override;

 private:
  // The object providing and possibly owning the compressed `Reader`.
  Dependency<Reader*, Src> src_;
};

explicit XzReader(Closed) -> XzReader<DeleteCtad<Closed>>;
template <typename Src>
explicit XzReader(Src&& src,
                  XzReaderBase::Options options = XzReaderBase::Options())
    -> XzReader<TargetT<Src>>;

// Returns `true` if the data look like they have been Xz-compressed with
// `Container::kXz`.
//
// The current position of `src` is unchanged.
bool RecognizeXz(Reader& src);

// Implementation details follow.

inline XzReaderBase::XzReaderBase(
    BufferOptions buffer_options, Container container, uint32_t flags,
    const RecyclingPoolOptions& recycling_pool_options)
    : BufferedReader(buffer_options),
      container_(container),
      flags_(flags),
      recycling_pool_options_(recycling_pool_options) {}

inline XzReaderBase::XzReaderBase(XzReaderBase&& that) noexcept
    : BufferedReader(static_cast<BufferedReader&&>(that)),
      container_(that.container_),
      flags_(that.flags_),
      initial_compressed_pos_(that.initial_compressed_pos_),
      truncated_(that.truncated_),
      recycling_pool_options_(that.recycling_pool_options_),
      decompressor_(std::move(that.decompressor_)) {}

inline XzReaderBase& XzReaderBase::operator=(XzReaderBase&& that) noexcept {
  BufferedReader::operator=(static_cast<BufferedReader&&>(that));
  container_ = that.container_;
  flags_ = that.flags_;
  initial_compressed_pos_ = that.initial_compressed_pos_;
  truncated_ = that.truncated_;
  recycling_pool_options_ = that.recycling_pool_options_;
  decompressor_ = std::move(that.decompressor_);
  return *this;
}

inline void XzReaderBase::Reset(Closed) {
  BufferedReader::Reset(kClosed);
  container_ = Options::kDefaultContainer;
  flags_ = 0;
  initial_compressed_pos_ = 0;
  truncated_ = false;
  recycling_pool_options_ = RecyclingPoolOptions();
  decompressor_.reset();
}

inline void XzReaderBase::Reset(
    BufferOptions buffer_options, Container container, uint32_t flags,
    const RecyclingPoolOptions& recycling_pool_options) {
  BufferedReader::Reset(buffer_options);
  container_ = container;
  flags_ = flags;
  initial_compressed_pos_ = 0;
  truncated_ = false;
  recycling_pool_options_ = recycling_pool_options;
  decompressor_.reset();
}

template <typename Src>
inline XzReader<Src>::XzReader(Initializer<Src> src, Options options)
    : XzReaderBase(options.buffer_options(), options.container(),
                   options.concatenate() ? LZMA_CONCATENATED : 0,
                   options.recycling_pool_options()),
      src_(std::move(src)) {
  Initialize(src_.get());
}

template <typename Src>
inline void XzReader<Src>::Reset(Closed) {
  XzReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void XzReader<Src>::Reset(Initializer<Src> src, Options options) {
  XzReaderBase::Reset(options.buffer_options(), options.container(),
                      options.concatenate() ? LZMA_CONCATENATED : 0,
                      options.recycling_pool_options());
  src_.Reset(std::move(src));
  Initialize(src_.get());
}

template <typename Src>
void XzReader<Src>::Done() {
  XzReaderBase::Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      FailWithoutAnnotation(AnnotateOverSrc(src_->status()));
    }
  }
}

template <typename Src>
void XzReader<Src>::SetReadAllHintImpl(bool read_all_hint) {
  XzReaderBase::SetReadAllHintImpl(read_all_hint);
  if (src_.IsOwning()) src_->SetReadAllHint(read_all_hint);
}

template <typename Src>
void XzReader<Src>::VerifyEndImpl() {
  XzReaderBase::VerifyEndImpl();
  if (src_.IsOwning() && ABSL_PREDICT_TRUE(ok())) src_->VerifyEnd();
}

}  // namespace riegeli

#endif  // RIEGELI_XZ_XZ_READER_H_


================================================
FILE: riegeli/xz/xz_writer.cc
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/xz/xz_writer.h"

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <memory>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "lzma.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/xz/xz_error.h"
#include "riegeli/xz/xz_reader.h"

namespace riegeli {

static_assert(static_cast<int>(XzWriterBase::Container::kXz) ==
                  static_cast<int>(XzReaderBase::Container::kXz),
              "Mismatched Container enums");
static_assert(static_cast<int>(XzWriterBase::Container::kLzma) ==
                  static_cast<int>(XzReaderBase::Container::kLzma),
              "Mismatched Container enums");

void XzWriterBase::Initialize(Writer* dest, uint32_t preset, Check check,
                              int parallelism) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of XzWriter: null Writer pointer";
  if (ABSL_PREDICT_FALSE(!dest->ok())) {
    FailWithoutAnnotation(AnnotateOverDest(dest->status()));
    return;
  }
  initial_compressed_pos_ = dest->pos();
  compressor_ =
      KeyedRecyclingPool<lzma_stream, LzmaStreamKey, LzmaStreamDeleter>::global(
          recycling_pool_options_)
          .Get(LzmaStreamKey(container_,
                             container_ == Container::kXz && parallelism > 0,
                             preset),
               [] { return riegeli::Maker<lzma_stream>(); });
  switch (container_) {
    case Container::kXz: {
      if (parallelism == 0) {
        flush_action_ = LZMA_SYNC_FLUSH;
        const lzma_ret liblzma_code = lzma_easy_encoder(
            compressor_.get(), preset, static_cast<lzma_check>(check));
        if (ABSL_PREDICT_FALSE(liblzma_code != LZMA_OK)) {
          FailOperation("lzma_easy_encoder()", liblzma_code);
        }
      } else {
        // `lzma_stream_encoder_mt()` does not support `LZMA_SYNC_FLUSH`.
        flush_action_ = LZMA_FULL_FLUSH;
        lzma_mt mt_options{};
        mt_options.threads = SaturatingIntCast<uint32_t>(parallelism);
        mt_options.preset = preset;
        mt_options.check = static_cast<lzma_check>(check);
        const lzma_ret liblzma_code =
            lzma_stream_encoder_mt(compressor_.get(), &mt_options);
        if (ABSL_PREDICT_FALSE(liblzma_code != LZMA_OK)) {
          FailOperation("lzma_stream_encoder_mt()", liblzma_code);
        }
      }
      return;
    }
    case Container::kLzma: {
      // `lzma_alone_encoder()` does not support `LZMA_SYNC_FLUSH` nor
      // `LZMA_FULL_FLUSH`.
      flush_action_ = LZMA_RUN;
      lzma_options_lzma options;
      if (ABSL_PREDICT_FALSE(lzma_lzma_preset(&options, preset))) {
        FailOperation("lzma_lzma_preset() failed", LZMA_OPTIONS_ERROR);
        return;
      }
      const lzma_ret liblzma_code =
          lzma_alone_encoder(compressor_.get(), &options);
      if (ABSL_PREDICT_FALSE(liblzma_code != LZMA_OK)) {
        FailOperation("lzma_alone_encoder()", liblzma_code);
      }
      return;
    }
  }
  RIEGELI_ASSUME_UNREACHABLE()
      << "Unknown container format: " << static_cast<int>(container_);
}

void XzWriterBase::DoneBehindBuffer(absl::string_view src) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::DoneBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!ok())) return;
  Writer& dest = *DestWriter();
  WriteInternal(src, dest, LZMA_FINISH);
}

void XzWriterBase::Done() {
  BufferedWriter::Done();
  compressor_.reset();
  associated_reader_.Reset();
}

inline bool XzWriterBase::FailOperation(absl::string_view operation,
                                        lzma_ret liblzma_code) {
  RIEGELI_ASSERT_NE(liblzma_code, LZMA_OK)
      << "Failed precondition of XzWriterBase::FailOperation(): "
         "liblzma error code not failed";
  RIEGELI_ASSERT(is_open())
      << "Failed precondition of XzWriterBase::FailOperation(): "
         "Object closed";
  return Fail(xz_internal::XzErrorToStatus(operation, liblzma_code));
}

absl::Status XzWriterBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    Writer& dest = *DestWriter();
    status = dest.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `dest` with the compressed
  // position. Clarify that the current position is the uncompressed position
  // instead of delegating to `BufferedWriter::AnnotateStatusImpl()`.
  return AnnotateOverDest(std::move(status));
}

absl::Status XzWriterBase::AnnotateOverDest(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("at uncompressed byte ", pos()));
  }
  return status;
}

bool XzWriterBase::WriteInternal(absl::string_view src) {
  RIEGELI_ASSERT(!src.empty())
      << "Failed precondition of BufferedWriter::WriteInternal(): "
         "nothing to write";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedWriter::WriteInternal()";
  Writer& dest = *DestWriter();
  return WriteInternal(src, dest, LZMA_RUN);
}

inline bool XzWriterBase::WriteInternal(absl::string_view src, Writer& dest,
                                        lzma_action flush) {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of XzWriterBase::WriteInternal()";
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  compressor_->next_in = reinterpret_cast<const uint8_t*>(src.data());
  for (;;) {
    compressor_->avail_in =
        PtrDistance(reinterpret_cast<const char*>(compressor_->next_in),
                    src.data() + src.size());
    compressor_->next_out = reinterpret_cast<uint8_t*>(dest.cursor());
    compressor_->avail_out = dest.available();
    const lzma_ret liblzma_code = lzma_code(compressor_.get(), flush);
    dest.set_cursor(reinterpret_cast<char*>(compressor_->next_out));
    const size_t length_written = PtrDistance(
        src.data(), reinterpret_cast<const char*>(compressor_->next_in));
    switch (liblzma_code) {
      case LZMA_OK:
      case LZMA_BUF_ERROR:
        if (compressor_->avail_out == 0) {
          if (ABSL_PREDICT_FALSE(!dest.Push())) {
            return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
          }
          continue;
        }
        RIEGELI_ASSERT_EQ(compressor_->avail_in, 0u)
            << "lzma_code() returned but there are still input data "
               "and output space";
        break;
      case LZMA_STREAM_END:
        break;
      default:
        return FailOperation("lzma_code()", liblzma_code);
    }
    RIEGELI_ASSERT_EQ(length_written, src.size())
        << "lzma_code() returned but there are still input data";
    move_start_pos(length_written);
    return true;
  }
}

bool XzWriterBase::FlushBehindBuffer(absl::string_view src,
                                     FlushType flush_type) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::FlushBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  if (src.empty() && flush_action_ == LZMA_RUN) return true;
  Writer& dest = *DestWriter();
  return WriteInternal(src, dest, flush_action_);
}

bool XzWriterBase::SupportsReadMode() {
  switch (container_) {
    case Container::kXz: {
      Writer* const dest = DestWriter();
      return dest != nullptr && dest->SupportsReadMode();
    }
    case Container::kLzma:
      return false;
  }
  RIEGELI_ASSUME_UNREACHABLE()
      << "Unknown container format: " << static_cast<int>(container_);
}

Reader* XzWriterBase::ReadModeBehindBuffer(Position initial_pos) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::ReadModeBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!XzWriterBase::FlushBehindBuffer(
          absl::string_view(), FlushType::kFromObject))) {
    return nullptr;
  }
  Writer& dest = *DestWriter();
  Reader* const compressed_reader = dest.ReadMode(initial_compressed_pos_);
  if (ABSL_PREDICT_FALSE(compressed_reader == nullptr)) {
    FailWithoutAnnotation(AnnotateOverDest(dest.status()));
    return nullptr;
  }
  XzReader<>* const reader = associated_reader_.ResetReader(
      compressed_reader,
      XzReaderBase::Options()
          .set_container(static_cast<XzReaderBase::Container>(container_))
          .set_buffer_options(buffer_options())
          .set_recycling_pool_options(recycling_pool_options_));
  reader->Seek(initial_pos);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/xz/xz_writer.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_XZ_XZ_WRITER_H_
#define RIEGELI_XZ_XZ_WRITER_H_

#include <stdint.h>

#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "lzma.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/writer.h"

namespace riegeli {

class Reader;
template <typename Src>
class XzReader;

// Template parameter independent part of `XzWriter`.
class XzWriterBase : public BufferedWriter {
 public:
  // Specifies what container format to write.
  enum class Container {
    kXz,    // Xz container (recommended).
    kLzma,  // Lzma container (legacy file format).
  };

  // Specifies what integrity check to use.
  enum class Check {
    kNone = LZMA_CHECK_NONE,      // No check.
    kCrc32 = LZMA_CHECK_CRC32,    // CRC32 (IEEE 802.3)
    kCrc64 = LZMA_CHECK_CRC64,    // CRC64 (ECMA-182; default)
    kSha256 = LZMA_CHECK_SHA256,  // SHA-256
  };

  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // What container format to write.
    //
    // `Flush()` is effective and `ReadMode()` is supported only with
    // `Container::kXz`.
    //
    // Default: `Container::kXz`.
    static constexpr Container kDefaultContainer = Container::kXz;
    Options& set_container(Container container) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      container_ = container;
      return *this;
    }
    Options&& set_container(Container container) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_container(container));
    }
    Container container() const { return container_; }

    // Tunes the tradeoff between compression density and compression speed
    // (higher = better density but slower and requires more memory for
    // compression and decompression).
    //
    // `compression_level` must be between `kMinCompressionLevel` (0) and
    // `kMaxCompressionLevel` (9). Default: `kDefaultCompressionLevel` (6).
    static constexpr int kMinCompressionLevel = 0;
    static constexpr int kMaxCompressionLevel = 9;
    static constexpr int kDefaultCompressionLevel = 6;
    Options& set_compression_level(int compression_level) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GE(compression_level, kMinCompressionLevel)
          << "Failed precondition of "
             "XzWriterBase::Options::set_compression_level(): "
             "compression level out of range";
      RIEGELI_ASSERT_LE(compression_level, kMaxCompressionLevel)
          << "Failed precondition of "
             "XzWriterBase::Options::set_compression_level(): "
             "compression level out of range";
      preset_ = (preset_ & ~LZMA_PRESET_LEVEL_MASK) |
                IntCast<uint32_t>(compression_level);
      return *this;
    }
    Options&& set_compression_level(int compression_level) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_compression_level(compression_level));
    }
    int compression_level() const {
      return IntCast<int>(preset_ & LZMA_PRESET_LEVEL_MASK);
    }

    // Within a given compression level, further tunes the tradeoff between
    // compression density and compression speed (`true` = better density but
    // slower), without affecting memory requirements (only compression requires
    // slightly more memory with compression levels <= 3).
    Options& set_extreme(bool extreme) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      preset_ = (preset_ & LZMA_PRESET_LEVEL_MASK) |
                (extreme ? LZMA_PRESET_EXTREME : 0);
      return *this;
    }
    Options&& set_extreme(bool extreme) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_extreme(extreme));
    }
    bool extreme() const { return (preset_ & LZMA_PRESET_EXTREME) != 0; }

    // Integrity check to use.
    //
    // This is effective only with `Container::kXz`.
    //
    // Default: `Check::kCrc64`.
    static constexpr Check kDefaultCheck = Check::kCrc64;
    Options& set_check(Check check) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      check_ = check;
      return *this;
    }
    Options&& set_check(Check check) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_check(check));
    }
    Check check() const { return check_; }

    // Number of background threads to use. Larger parallelism can increase
    // throughput, up to a point where it no longer matters; smaller parallelism
    // reduces memory usage. `parallelism() == 0` disables background threads.
    //
    // `parallelism() > 0` is effective only with `Container::kXz`.
    //
    // `parallelism() > 0` has a side effect of forcing `Flush()` to finish the
    // current block, which degrades compression density.
    //
    // Default: 0.
    Options& set_parallelism(int parallelism) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GE(parallelism, 0)
          << "Failed precondition of XzWriterBase::Options::set_parallelism(): "
             "negative parallelism";
      parallelism_ = parallelism;
      return *this;
    }
    Options&& set_parallelism(int parallelism) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_parallelism(parallelism));
    }
    int parallelism() const { return parallelism_; }

    // Options for a global `KeyedRecyclingPool` of compression contexts.
    //
    // They tune the amount of memory which is kept to speed up creation of new
    // compression sessions, and usage of a background thread to clean it.
    //
    // Default: `RecyclingPoolOptions()`.
    Options& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      recycling_pool_options_ = recycling_pool_options;
      return *this;
    }
    Options&& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_recycling_pool_options(recycling_pool_options));
    }
    const RecyclingPoolOptions& recycling_pool_options() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recycling_pool_options_;
    }

   private:
    template <typename Dest>
    friend class XzWriter;  // For `preset_`.

    Container container_ = kDefaultContainer;
    uint32_t preset_ = kDefaultCompressionLevel;
    Check check_ = kDefaultCheck;
    int parallelism_ = 0;
    RecyclingPoolOptions recycling_pool_options_;
  };

  // Returns the compressed `Writer`. Unchanged by `Close()`.
  virtual Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool SupportsReadMode() override;

 protected:
  explicit XzWriterBase(Closed) noexcept : BufferedWriter(kClosed) {}

  explicit XzWriterBase(BufferOptions buffer_options, Container container,
                        const RecyclingPoolOptions& recycling_pool_options);

  XzWriterBase(XzWriterBase&& that) noexcept;
  XzWriterBase& operator=(XzWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options, Container container,
             const RecyclingPoolOptions& recycling_pool_options);
  void Initialize(Writer* dest, uint32_t preset, Check check, int parallelism);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverDest(absl::Status status);

  void DoneBehindBuffer(absl::string_view src) override;
  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool WriteInternal(absl::string_view src) override;
  bool FlushBehindBuffer(absl::string_view src, FlushType flush_type) override;
  Reader* ReadModeBehindBuffer(Position initial_pos) override;

 private:
  struct LzmaStreamDeleter {
    void operator()(lzma_stream* ptr) const {
      lzma_end(ptr);
      delete ptr;
    }
  };

  struct LzmaStreamKey : WithEqual<LzmaStreamKey> {
    LzmaStreamKey() = default;
    explicit LzmaStreamKey(Container container, bool with_parallelism,
                           uint32_t preset)
        : container(container),
          with_parallelism(with_parallelism),
          preset(preset) {}

    friend bool operator==(LzmaStreamKey a, LzmaStreamKey b) {
      return a.container == b.container &&
             a.with_parallelism == b.with_parallelism && a.preset == b.preset;
    }
    template <typename HashState>
    friend HashState AbslHashValue(HashState hash_state, LzmaStreamKey self) {
      return HashState::combine(std::move(hash_state), self.container,
                                self.with_parallelism, self.preset);
    }

    Container container;
    bool with_parallelism;
    uint32_t preset;
  };

  ABSL_ATTRIBUTE_COLD bool FailOperation(absl::string_view operation,
                                         lzma_ret liblzma_code);
  bool WriteInternal(absl::string_view src, Writer& dest, lzma_action flush);

  Container container_ = Container::kXz;
  lzma_action flush_action_ = LZMA_SYNC_FLUSH;
  RecyclingPoolOptions recycling_pool_options_;
  Position initial_compressed_pos_ = 0;
  KeyedRecyclingPool<lzma_stream, LzmaStreamKey, LzmaStreamDeleter>::Handle
      compressor_;

  AssociatedReader<XzReader<Reader*>> associated_reader_;
};

// A `Writer` which compresses data with Xz (LZMA) before passing it to another
// `Writer`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the compressed `Writer`. `Dest` must support
// `Dependency<Writer*, Dest>`, e.g. `Writer*` (not owned, default),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The compressed `Writer` must not be accessed until the `XzWriter` is closed
// or no longer used, except that it is allowed to read the destination of the
// compressed `Writer` immediately after `Flush()`. `Flush()` is effective only
// with `Container::kXz`.
template <typename Dest = Writer*>
class XzWriter : public XzWriterBase {
 public:
  // Creates a closed `XzWriter`.
  explicit XzWriter(Closed) noexcept : XzWriterBase(kClosed) {}

  // Will write to the compressed `Writer` provided by `dest`.
  explicit XzWriter(Initializer<Dest> dest, Options options = Options());

  XzWriter(XzWriter&& that) = default;
  XzWriter& operator=(XzWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `XzWriter`. This avoids
  // constructing a temporary `XzWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the compressed `Writer`.
  // Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  // The object providing and possibly owning the compressed `Writer`.
  Dependency<Writer*, Dest> dest_;
};

explicit XzWriter(Closed) -> XzWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit XzWriter(Dest&& dest,
                  XzWriterBase::Options options = XzWriterBase::Options())
    -> XzWriter<TargetT<Dest>>;

// Implementation details follow.

inline XzWriterBase::XzWriterBase(
    BufferOptions buffer_options, Container container,
    const RecyclingPoolOptions& recycling_pool_options)
    : BufferedWriter(buffer_options),
      container_(container),
      recycling_pool_options_(recycling_pool_options) {}

inline XzWriterBase::XzWriterBase(XzWriterBase&& that) noexcept
    : BufferedWriter(static_cast<BufferedWriter&&>(that)),
      container_(that.container_),
      flush_action_(that.flush_action_),
      recycling_pool_options_(that.recycling_pool_options_),
      initial_compressed_pos_(that.initial_compressed_pos_),
      compressor_(std::move(that.compressor_)),
      associated_reader_(std::move(that.associated_reader_)) {}

inline XzWriterBase& XzWriterBase::operator=(XzWriterBase&& that) noexcept {
  BufferedWriter::operator=(static_cast<BufferedWriter&&>(that));
  container_ = that.container_;
  flush_action_ = that.flush_action_;
  recycling_pool_options_ = that.recycling_pool_options_;
  initial_compressed_pos_ = that.initial_compressed_pos_;
  compressor_ = std::move(that.compressor_);
  associated_reader_ = std::move(that.associated_reader_);
  return *this;
}

inline void XzWriterBase::Reset(Closed) {
  BufferedWriter::Reset(kClosed);
  container_ = Options::kDefaultContainer;
  flush_action_ = LZMA_SYNC_FLUSH;
  recycling_pool_options_ = RecyclingPoolOptions();
  initial_compressed_pos_ = 0;
  compressor_.reset();
  associated_reader_.Reset();
}

inline void XzWriterBase::Reset(
    BufferOptions buffer_options, Container container,
    const RecyclingPoolOptions& recycling_pool_options) {
  BufferedWriter::Reset(buffer_options);
  container_ = container;
  flush_action_ = LZMA_SYNC_FLUSH;
  recycling_pool_options_ = recycling_pool_options;
  initial_compressed_pos_ = 0;
  compressor_.reset();
  associated_reader_.Reset();
}

template <typename Dest>
inline XzWriter<Dest>::XzWriter(Initializer<Dest> dest, Options options)
    : XzWriterBase(options.buffer_options(), options.container(),
                   options.recycling_pool_options()),
      dest_(std::move(dest)) {
  Initialize(dest_.get(), options.preset_, options.check(),
             options.parallelism());
}

template <typename Dest>
inline void XzWriter<Dest>::Reset(Closed) {
  XzWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void XzWriter<Dest>::Reset(Initializer<Dest> dest, Options options) {
  XzWriterBase::Reset(options.buffer_options(), options.container(),
                      options.recycling_pool_options());
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options.preset_, options.check(),
             options.parallelism());
}

template <typename Dest>
void XzWriter<Dest>::Done() {
  XzWriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
}

template <typename Dest>
bool XzWriter<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!XzWriterBase::FlushImpl(flush_type))) return false;
  if (flush_type != FlushType::kFromObject || dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Flush(flush_type))) {
      return FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
  return true;
}

}  // namespace riegeli

#endif  // RIEGELI_XZ_XZ_WRITER_H_


================================================
FILE: riegeli/zlib/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "zlib_reader",
    srcs = [
        "zlib_dictionary.h",
        "zlib_reader.cc",
    ],
    hdrs = ["zlib_reader.h"],
    deps = [
        ":zlib_error",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:bytes_ref",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:recycling_pool",
        "//riegeli/base:shared_ptr",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:buffer_options",
        "//riegeli/bytes:buffered_reader",
        "//riegeli/bytes:reader",
        "//riegeli/endian:endian_reading",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@zlib",
    ],
)

cc_library(
    name = "zlib_writer",
    srcs = [
        "zlib_dictionary.h",
        "zlib_writer.cc",
    ],
    hdrs = ["zlib_writer.h"],
    deps = [
        ":zlib_error",
        ":zlib_reader",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:bytes_ref",
        "//riegeli/base:compare",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:recycling_pool",
        "//riegeli/base:shared_ptr",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:buffer_options",
        "//riegeli/bytes:buffered_writer",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@zlib",
    ],
)

cc_library(
    name = "zlib_error",
    srcs = ["zlib_error.cc"],
    hdrs = ["zlib_error.h"],
    visibility = ["//visibility:private"],
    deps = [
        "//riegeli/base:assert",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@zlib",
    ],
)


================================================
FILE: riegeli/zlib/zlib_dictionary.h
================================================
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_ZLIB_ZLIB_DICTIONARY_H_
#define RIEGELI_ZLIB_ZLIB_DICTIONARY_H_

// IWYU pragma: private, include "riegeli/zlib/zlib_reader.h"
// IWYU pragma: private, include "riegeli/zlib/zlib_writer.h"

#include <string>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/shared_ptr.h"

namespace riegeli {

// Stores an optional Zlib dictionary for compression and decompression.
//
// An empty dictionary is equivalent to having no dictionary.
//
// A `ZlibDictionary` object can own the dictionary data, or can hold a pointer
// to unowned dictionary data which must not be changed until the last
// `ZlibReader` and `ZlibWriter` using this dictionary is closed or no longer
// used. If the same dictionary is needed for multiple compression or
// decompression sessions, the `ZlibDictionary` object can be reused.
//
// Copying a `ZlibDictionary` object is cheap, sharing the actual dictionary.
class ZlibDictionary {
 public:
  // Creates an empty `ZlibDictionary`.
  ZlibDictionary() = default;

  ZlibDictionary(const ZlibDictionary& that) = default;
  ZlibDictionary& operator=(const ZlibDictionary& that) = default;

  ZlibDictionary(ZlibDictionary&& that) = default;
  ZlibDictionary& operator=(ZlibDictionary&& that) = default;

  // Resets the `ZlibDictionary` to the empty state.
  ZlibDictionary& Reset() & ABSL_ATTRIBUTE_LIFETIME_BOUND;
  ZlibDictionary&& Reset() && ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(Reset());
  }

  // Sets a dictionary (data which should contain sequences that are commonly
  // seen in the data being compressed).
  ZlibDictionary& set_data(BytesInitializer data) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND;
  ZlibDictionary&& set_data(BytesInitializer data) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_data(std::move(data)));
  }

  // Like `set_data()`, but does not take ownership of `data`, which must not be
  // changed until the last `ZlibReader` and `ZlibWriter` using this dictionary
  // is closed or no longer used.
  ZlibDictionary& set_data_unowned(
      absl::string_view data ABSL_ATTRIBUTE_LIFETIME_BOUND) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND;
  ZlibDictionary&& set_data_unowned(
      absl::string_view data ABSL_ATTRIBUTE_LIFETIME_BOUND) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_data_unowned(data));
  }

  // Returns `true` if no dictionary is present.
  bool empty() const { return data_.empty(); }

  // Returns the dictionary data.
  absl::string_view data() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return data_; }

 private:
  SharedPtr<const std::string> owned_data_;
  absl::string_view data_;
};

// Implementation details follow.

inline ZlibDictionary& ZlibDictionary::Reset() & ABSL_ATTRIBUTE_LIFETIME_BOUND {
  owned_data_.Reset();
  data_ = absl::string_view();
  return *this;
}

inline ZlibDictionary& ZlibDictionary::set_data(BytesInitializer data) &
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  owned_data_.Reset(std::move(data));
  data_ = owned_data_->data();
  return *this;
}

inline ZlibDictionary& ZlibDictionary::set_data_unowned(
    absl::string_view data ABSL_ATTRIBUTE_LIFETIME_BOUND) &
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  owned_data_.Reset();
  data_ = data;
  return *this;
}

}  // namespace riegeli

#endif  // RIEGELI_ZLIB_ZLIB_DICTIONARY_H_


================================================
FILE: riegeli/zlib/zlib_error.cc
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/zlib/zlib_error.h"

#include <string>

#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "zlib.h"

namespace riegeli::zlib_internal {

absl::Status ZlibErrorToStatus(absl::string_view operation, int zlib_code,
                               const char* details) {
  absl::StatusCode code;
  switch (zlib_code) {
    case Z_OK:
      return absl::OkStatus();
    case Z_NEED_DICT:
    case Z_DATA_ERROR:
      code = absl::StatusCode::kInvalidArgument;
      break;
    case Z_MEM_ERROR:
      code = absl::StatusCode::kResourceExhausted;
      break;
    default:
      // Should not happen.
      code = absl::StatusCode::kInternal;
      break;
  }
  std::string message = absl::StrCat(operation, " failed");
  if (details == nullptr) {
    switch (zlib_code) {
      case Z_OK:
        RIEGELI_ASSUME_UNREACHABLE() << "Handled before switch";
      case Z_STREAM_END:
        details = "stream end";
        break;
      case Z_NEED_DICT:
        details = "need dictionary";
        break;
      case Z_ERRNO:
        details = "file error";
        break;
      case Z_STREAM_ERROR:
        details = "stream error";
        break;
      case Z_DATA_ERROR:
        details = "data error";
        break;
      case Z_MEM_ERROR:
        details = "insufficient memory";
        break;
      case Z_BUF_ERROR:
        details = "buffer error";
        break;
      case Z_VERSION_ERROR:
        details = "incompatible version";
        break;
      default:
        absl::StrAppend(&message, ": unknown zlib error code: ", zlib_code);
        break;
    }
  }
  if (details != nullptr) absl::StrAppend(&message, ": ", details);
  return absl::Status(code, message);
}

}  // namespace riegeli::zlib_internal


================================================
FILE: riegeli/zlib/zlib_error.h
================================================
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_ZLIB_ZLIB_ERROR_H_
#define RIEGELI_ZLIB_ZLIB_ERROR_H_

#include "absl/base/attributes.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"

namespace riegeli::zlib_internal {

ABSL_ATTRIBUTE_COLD absl::Status ZlibErrorToStatus(absl::string_view operation,
                                                   int zlib_code,
                                                   const char* details);

}  // namespace riegeli::zlib_internal

#endif  // RIEGELI_ZLIB_ZLIB_ERROR_H_


================================================
FILE: riegeli/zlib/zlib_reader.cc
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/zlib/zlib_reader.h"

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/endian/endian_reading.h"
#include "riegeli/zlib/zlib_error.h"
#include "zconf.h"
#include "zlib.h"

namespace riegeli {

static_assert(ZlibReaderBase::Options::kMaxWindowLog == MAX_WBITS,
              "Mismatched constant");
static_assert(ZlibReaderBase::Options::kDefaultWindowLog == MAX_WBITS,
              "Mismatched constant");

void ZlibReaderBase::ZStreamDeleter::operator()(void* ptr) const {
  z_stream* const z_stream_ptr = static_cast<z_stream*>(ptr);
  const int zlib_code = inflateEnd(z_stream_ptr);
  RIEGELI_ASSERT_EQ(zlib_code, Z_OK) << "inflateEnd() failed";
  delete z_stream_ptr;
}

void ZlibReaderBase::Initialize(Reader* src) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of ZlibReader: null Reader pointer";
  if (ABSL_PREDICT_FALSE(!src->ok()) && src->available() == 0) {
    FailWithoutAnnotation(AnnotateOverSrc(src->status()));
    return;
  }
  initial_compressed_pos_ = src->pos();
  InitializeDecompressor();
}

inline void ZlibReaderBase::InitializeDecompressor() {
  decompressor_ =
      RecyclingPool<void, ZStreamDeleter>::global(recycling_pool_options_)
          .Get(
              [&] {
                auto ptr =
                    riegeli::Maker<z_stream>().UniquePtr<ZStreamDeleter>();
                const int zlib_code = inflateInit2(ptr.get(), window_bits_);
                if (ABSL_PREDICT_FALSE(zlib_code != Z_OK)) {
                  FailOperation("inflateInit2()", zlib_code);
                }
                return ptr;
              },
              [&](void* ptr) {
                z_stream* const z_stream_ptr = static_cast<z_stream*>(ptr);
                const int zlib_code = inflateReset2(z_stream_ptr, window_bits_);
                if (ABSL_PREDICT_FALSE(zlib_code != Z_OK)) {
                  FailOperation("inflateReset2()", zlib_code);
                }
              });
}

void ZlibReaderBase::Done() {
  if (ABSL_PREDICT_FALSE(truncated_)) {
    Reader& src = *SrcReader();
    FailWithoutAnnotation(AnnotateOverSrc(src.AnnotateStatus(
        absl::InvalidArgumentError("Truncated Zlib-compressed stream"))));
  }
  BufferedReader::Done();
  decompressor_.reset();
  dictionary_ = ZlibDictionary();
}

inline bool ZlibReaderBase::FailOperation(absl::string_view operation,
                                          int zlib_code) {
  RIEGELI_ASSERT_NE(zlib_code, Z_OK)
      << "Failed precondition of ZlibReaderBase::FailOperation(): "
         "zlib error code not failed";
  RIEGELI_ASSERT(is_open())
      << "Failed precondition of ZlibReaderBase::FailOperation(): "
         "Object closed";
  z_stream* const z_stream_ptr = static_cast<z_stream*>(decompressor_.get());
  return Fail(zlib_internal::ZlibErrorToStatus(operation, zlib_code,
                                               z_stream_ptr->msg));
}

absl::Status ZlibReaderBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    if (ABSL_PREDICT_FALSE(truncated_)) {
      status = Annotate(status, "reading truncated Zlib-compressed stream");
    }
    Reader& src = *SrcReader();
    status = src.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `src` with the compressed position.
  // Clarify that the current position is the uncompressed position instead of
  // delegating to `BufferedReader::AnnotateStatusImpl()`.
  return AnnotateOverSrc(std::move(status));
}

absl::Status ZlibReaderBase::AnnotateOverSrc(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("at uncompressed byte ", pos()));
  }
  return status;
}

bool ZlibReaderBase::ReadInternal(size_t min_length, size_t max_length,
                                  char* dest) {
  RIEGELI_ASSERT_GT(min_length, 0u)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "nothing to read";
  RIEGELI_ASSERT_GE(max_length, min_length)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "max_length < min_length";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedReader::ReadInternal()";
  Reader& src = *SrcReader();
  truncated_ = false;
  max_length = UnsignedMin(max_length,
                           std::numeric_limits<Position>::max() - limit_pos());
  z_stream* const z_stream_ptr = static_cast<z_stream*>(decompressor_.get());
  z_stream_ptr->next_out = reinterpret_cast<Bytef*>(dest);
  for (;;) {
    z_stream_ptr->avail_out = SaturatingIntCast<uInt>(PtrDistance(
        reinterpret_cast<char*>(z_stream_ptr->next_out), dest + max_length));
    z_stream_ptr->next_in = const_cast<z_const Bytef*>(
        reinterpret_cast<const Bytef*>(src.cursor()));
    z_stream_ptr->avail_in = SaturatingIntCast<uInt>(src.available());
    if (z_stream_ptr->avail_in > 0) stream_had_data_ = true;
    int zlib_code = inflate(z_stream_ptr, Z_NO_FLUSH);
    src.set_cursor(reinterpret_cast<const char*>(z_stream_ptr->next_in));
    const size_t length_read =
        PtrDistance(dest, reinterpret_cast<char*>(z_stream_ptr->next_out));
    switch (zlib_code) {
      case Z_OK:
        if (length_read >= min_length) break;
        ABSL_FALLTHROUGH_INTENDED;
      case Z_BUF_ERROR:
        if (ABSL_PREDICT_FALSE(z_stream_ptr->avail_in > 0)) {
          RIEGELI_ASSERT_EQ(z_stream_ptr->avail_out, 0u)
              << "inflate() returned but there are still input data "
                 "and output space";
          RIEGELI_ASSERT_EQ(length_read,
                            std::numeric_limits<Position>::max() - limit_pos())
              << "The position does not overflow but the output buffer is "
                 "full, while less than min_length was output, which is "
                 "impossible because the buffer has size max_length which is "
                 "at least min_length if the position does not overflow";
          move_limit_pos(length_read);
          return FailOverflow();
        }
        if (ABSL_PREDICT_FALSE(!src.Pull())) {
          move_limit_pos(length_read);
          if (ABSL_PREDICT_FALSE(!src.ok())) {
            return FailWithoutAnnotation(AnnotateOverSrc(src.status()));
          }
          if (ABSL_PREDICT_FALSE(!concatenate_ || stream_had_data_)) {
            truncated_ = true;
          }
          return false;
        }
        continue;
      case Z_STREAM_END:
        if (concatenate_) {
          const int zlib_code = inflateReset(z_stream_ptr);
          if (ABSL_PREDICT_FALSE(zlib_code != Z_OK)) {
            FailOperation("inflateReset()", zlib_code);
            break;
          }
          stream_had_data_ = false;
          if (length_read >= min_length) break;
          continue;
        }
        decompressor_.reset();
        move_limit_pos(length_read);
        // Avoid `BufferedReader` allocating another buffer.
        set_exact_size(limit_pos());
        return length_read >= min_length;
      case Z_NEED_DICT:
        if (ABSL_PREDICT_TRUE(!dictionary_.empty())) {
          zlib_code = inflateSetDictionary(
              z_stream_ptr,
              const_cast<z_const Bytef*>(
                  reinterpret_cast<const Bytef*>(dictionary_.data().data())),
              SaturatingIntCast<uInt>(dictionary_.data().size()));
          if (ABSL_PREDICT_FALSE(zlib_code != Z_OK)) {
            FailOperation("inflateSetDictionary()", zlib_code);
            break;
          }
          continue;
        }
        ABSL_FALLTHROUGH_INTENDED;
      default:
        FailOperation("inflate()", zlib_code);
        break;
    }
    move_limit_pos(length_read);
    return length_read >= min_length;
  }
}

void ZlibReaderBase::ExactSizeReached() {
  if (decompressor_ == nullptr) return;
  char buffer[1];
  if (ABSL_PREDICT_FALSE(ZlibReaderBase::ReadInternal(1, 1, buffer))) {
    decompressor_.reset();
    Fail(absl::FailedPreconditionError(
        "Uncompressed size reached but more data can be decompressed, "
        "which implies that seeking back and reading again encountered "
        "changed Zlib-compressed data"));
  }
}

bool ZlibReaderBase::ToleratesReadingAhead() {
  Reader* const src = SrcReader();
  return src != nullptr && src->ToleratesReadingAhead();
}

bool ZlibReaderBase::SupportsRewind() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsRewind();
}

bool ZlibReaderBase::SeekBehindBuffer(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "buffer not empty";
  if (new_pos <= limit_pos()) {
    // Seeking backwards.
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    Reader& src = *SrcReader();
    truncated_ = false;
    stream_had_data_ = false;
    set_buffer();
    set_limit_pos(0);
    decompressor_.reset();
    if (ABSL_PREDICT_FALSE(!src.Seek(initial_compressed_pos_))) {
      return FailWithoutAnnotation(AnnotateOverSrc(src.StatusOrAnnotate(
          absl::DataLossError("Zlib-compressed stream got truncated"))));
    }
    InitializeDecompressor();
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    if (new_pos == 0) return true;
  }
  return BufferedReader::SeekBehindBuffer(new_pos);
}

bool ZlibReaderBase::SupportsNewReader() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsNewReader();
}

std::unique_ptr<Reader> ZlibReaderBase::NewReaderImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point
  // if `SrcReader()->SupportsNewReader()`.
  Reader& src = *SrcReader();
  std::unique_ptr<Reader> compressed_reader =
      src.NewReader(initial_compressed_pos_);
  if (ABSL_PREDICT_FALSE(compressed_reader == nullptr)) {
    FailWithoutAnnotation(AnnotateOverSrc(src.status()));
    return nullptr;
  }
  std::unique_ptr<Reader> reader =
      std::make_unique<ZlibReader<std::unique_ptr<Reader>>>(
          std::move(compressed_reader),
          ZlibReaderBase::Options()
              .set_header(window_bits_ < 0
                              ? Header::kRaw
                              : static_cast<Header>(window_bits_ & ~15))
              .set_window_log(window_bits_ < 0 ? -window_bits_
                                               : window_bits_ & 15)
              .set_concatenate(concatenate_)
              .set_dictionary(dictionary_)
              .set_buffer_options(buffer_options())
              .set_recycling_pool_options(recycling_pool_options_));
  reader->Seek(initial_pos);
  return reader;
}

bool RecognizeZlib(Reader& src, ZlibReaderBase::Header header,
                   const RecyclingPoolOptions& recycling_pool_options) {
  RIEGELI_ASSERT_NE(header, ZlibReaderBase::Header::kRaw)
      << "Failed precondition of RecognizeZlib(): "
         "Header::kRaw cannot be reliably detected";
  using ZStreamDeleter = ZlibReaderBase::ZStreamDeleter;
  // If `header == Header::kRaw` then `window_bits == -1`, which causes
  // `inflateInit2()` or `inflateReset2()` to fail.
  const int window_bits = static_cast<int>(header);
  int zlib_code;
  const RecyclingPool<z_stream, ZStreamDeleter>::Handle decompressor =
      RecyclingPool<z_stream, ZStreamDeleter>::global(recycling_pool_options)
          .Get(
              [&] {
                auto ptr =
                    riegeli::Maker<z_stream>().UniquePtr<ZStreamDeleter>();
                zlib_code = inflateInit2(ptr.get(), window_bits);
                return ptr;
              },
              [&](z_stream* ptr) {
                zlib_code = inflateReset2(ptr, window_bits);
              });
  if (ABSL_PREDICT_FALSE(zlib_code != Z_OK)) return false;

  char dest[1];
  size_t cursor_index = 0;
  decompressor->next_out = reinterpret_cast<Bytef*>(dest);
  decompressor->avail_out = 1;
  for (;;) {
    decompressor->next_in = const_cast<z_const Bytef*>(
        reinterpret_cast<const Bytef*>(src.cursor() + cursor_index));
    decompressor->avail_in =
        SaturatingIntCast<uInt>(src.available() - cursor_index);
    // `Z_BLOCK` stops after decoding the header.
    switch (inflate(decompressor.get(), Z_BLOCK)) {
      case Z_OK:
        if (
            // Decoded the header.
            (decompressor->data_type & 128) != 0 ||
            // Output a byte. This is impossible if `header != Header::kRaw`;
            // kept for robustness.
            decompressor->avail_out < 1) {
          return true;
        }
        ABSL_FALLTHROUGH_INTENDED;
      case Z_BUF_ERROR:
        RIEGELI_ASSERT_EQ(decompressor->avail_in, 0u)
            << "inflate() returned but there are still input data";
        cursor_index = src.available();
        if (ABSL_PREDICT_FALSE(!src.Pull(cursor_index + 1))) return false;
        continue;
      case Z_STREAM_END:  // This is impossible if `header != Header::kRaw`;
                          // kept for robustness.
      case Z_NEED_DICT:
        return true;
      default:
        return false;
    }
  }
}

std::optional<uint32_t> GzipUncompressedSizeModulo4G(Reader& src) {
  RIEGELI_ASSERT(src.SupportsRandomAccess())
      << "Failed precondition of GzipUncompressedSizeModulo4G(): "
         "Reader does not support random access";
  const std::optional<Position> compressed_size = src.Size();
  if (ABSL_PREDICT_FALSE(compressed_size == std::nullopt ||
                         *compressed_size < 20)) {
    return std::nullopt;
  }
  const Position pos_before = src.pos();
  uint32_t uncompressed_size;
  if (ABSL_PREDICT_FALSE(!src.Seek(*compressed_size - sizeof(uint32_t)) ||
                         !ReadLittleEndian<uint32_t>(src, uncompressed_size) ||
                         !src.Seek(pos_before))) {
    return std::nullopt;
  }
  return uncompressed_size;
}

}  // namespace riegeli


================================================
FILE: riegeli/zlib/zlib_reader.h
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_ZLIB_ZLIB_READER_H_
#define RIEGELI_ZLIB_ZLIB_READER_H_

#include <stddef.h>
#include <stdint.h>

#include <memory>
#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/zlib/zlib_dictionary.h"  // IWYU pragma: export

namespace riegeli {

// Template parameter independent part of `ZlibReader`.
class ZlibReaderBase : public BufferedReader {
 public:
  // Specifies what format of header to expect.
  enum class Header {
    kZlib = 0,         // Zlib header.
    kGzip = 16,        // Gzip header.
    kZlibOrGzip = 32,  // Zlib or Gzip header.
    kRaw = -1,         // No header; compressor must write no header too.
  };

  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // What format of header to expect.
    //
    // Default: `Header::kZlibOrGzip`.
    static constexpr Header kDefaultHeader = Header::kZlibOrGzip;
    Options& set_header(Header header) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      header_ = header;
      return *this;
    }
    Options&& set_header(Header header) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_header(header));
    }
    Header header() const { return header_; }

    // Maximum acceptable logarithm of the LZ77 sliding window size.
    //
    // `window_log` must be between `kMinWindowLog` (9) and
    // `kMaxWindowLog` (15). Default: `kDefaultWindowLog` (15).
    static constexpr int kMinWindowLog = 9;
    static constexpr int kMaxWindowLog = 15;      // `MAX_WBITS`
    static constexpr int kDefaultWindowLog = 15;  // `MAX_WBITS`
    Options& set_window_log(int window_log) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GE(window_log, kMinWindowLog)
          << "Failed precondition of "
             "ZlibReaderBase::Options::set_window_log(): "
             "window log out of range";
      RIEGELI_ASSERT_LE(window_log, kMaxWindowLog)
          << "Failed precondition of "
             "ZlibReaderBase::Options::set_window_log(): "
             "window log out of range";
      window_log_ = window_log;
      return *this;
    }
    Options&& set_window_log(int window_log) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_window_log(window_log));
    }
    int window_log() const { return window_log_; }

    // If `true`, concatenated compressed streams are decoded to concatenation
    // of their decompressed contents. An empty compressed stream is decoded to
    // empty decompressed contents.
    //
    // If `false`, exactly one compressed stream is consumed.
    //
    // Default: `false`.
    Options& set_concatenate(bool concatenate) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      concatenate_ = concatenate;
      return *this;
    }
    Options&& set_concatenate(bool concatenate) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_concatenate(concatenate));
    }
    bool concatenate() const { return concatenate_; }

    // Zlib dictionary. The same dictionary must have been used for compression,
    // except that it is allowed to supply a dictionary for decompression even
    // if no dictionary was used for compression.
    //
    // Default: `ZlibDictionary()`.
    Options& set_dictionary(ZlibDictionary dictionary) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      dictionary_ = std::move(dictionary);
      return *this;
    }
    Options&& set_dictionary(ZlibDictionary dictionary) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_dictionary(std::move(dictionary)));
    }
    ZlibDictionary& dictionary() ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return dictionary_;
    }
    const ZlibDictionary& dictionary() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return dictionary_;
    }

    // Options for a global `RecyclingPool` of decompression contexts.
    //
    // They tune the amount of memory which is kept to speed up creation of new
    // decompression sessions, and usage of a background thread to clean it.
    //
    // Default: `RecyclingPoolOptions()`.
    Options& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      recycling_pool_options_ = recycling_pool_options;
      return *this;
    }
    Options&& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_recycling_pool_options(recycling_pool_options));
    }
    const RecyclingPoolOptions& recycling_pool_options() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recycling_pool_options_;
    }

   private:
    Header header_ = kDefaultHeader;
    int window_log_ = kDefaultWindowLog;
    bool concatenate_ = false;
    ZlibDictionary dictionary_;
    RecyclingPoolOptions recycling_pool_options_;
  };

  // Returns the compressed `Reader`. Unchanged by `Close()`.
  virtual Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns `true` if the source is truncated (without a clean end of the
  // compressed stream) at the current position. In such case, if the source
  // does not grow, `Close()` will fail.
  //
  // Precondition: `Options::concatenate()` was `false`.
  bool truncated() const {
    RIEGELI_ASSERT(!concatenate_)
        << "Failed precondition of ZlibReaderBase::truncated(): "
           "Options::concatenate() is true";
    return truncated_ && available() == 0;
  }

  bool ToleratesReadingAhead() override;
  bool SupportsRewind() override;
  bool SupportsNewReader() override;

 protected:
  explicit ZlibReaderBase(Closed) noexcept : BufferedReader(kClosed) {}

  explicit ZlibReaderBase(BufferOptions buffer_options, int window_bits,
                          bool concatenate, ZlibDictionary&& dictionary,
                          const RecyclingPoolOptions& recycling_pool_options);

  ZlibReaderBase(ZlibReaderBase&& that) noexcept;
  ZlibReaderBase& operator=(ZlibReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options, int window_bits, bool concatenate,
             ZlibDictionary&& dictionary,
             const RecyclingPoolOptions& recycling_pool_options);
  static int GetWindowBits(const Options& options);
  void Initialize(Reader* src);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverSrc(absl::Status status);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool ReadInternal(size_t min_length, size_t max_length, char* dest) override;
  void ExactSizeReached() override;
  bool SeekBehindBuffer(Position new_pos) override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;

 private:
  // For `ZStreamDeleter`.
  friend bool RecognizeZlib(Reader& src, ZlibReaderBase::Header header,
                            const RecyclingPoolOptions& recycling_pool_options);

  struct ZStreamDeleter {
    // `void*` is `z_stream*`. Avoid including `zlib.h` in the header.
    void operator()(void* ptr) const;
  };

  void InitializeDecompressor();
  ABSL_ATTRIBUTE_COLD bool FailOperation(absl::string_view operation,
                                         int zlib_code);

  int window_bits_ = 0;
  bool concatenate_ = false;
  Position initial_compressed_pos_ = 0;
  // If `true`, the source is truncated (without a clean end of the compressed
  // stream) at the current position. If the source does not grow, `Close()`
  // will fail.
  bool truncated_ = false;
  // If `true`, some compressed data from the current stream were processed.
  // If `concatenate_` and `!stream_had_data_`, an end of the source is
  // legitimate, it does not imply that the source is truncated.
  bool stream_had_data_ = false;
  ZlibDictionary dictionary_;
  RecyclingPoolOptions recycling_pool_options_;
  // If `ok()` but `decompressor_ == nullptr` then all data have been
  // decompressed, `exact_size() == limit_pos()`, and `ReadInternal()` must not
  // be called again.
  //
  // `void` is `z_stream`. Avoid including `zlib.h` in the header.
  RecyclingPool<void, ZStreamDeleter>::Handle decompressor_;
};

// A `Reader` which decompresses data with Zlib after getting it from another
// `Reader`.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the compressed `Reader`. `Src` must support
// `Dependency<Reader*, Src>`, e.g. `Reader*` (not owned, default),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The compressed `Reader` must not be accessed until the `ZlibReader` is closed
// or no longer used.
template <typename Src = Reader*>
class ZlibReader : public ZlibReaderBase {
 public:
  // Creates a closed `ZlibReader`.
  explicit ZlibReader(Closed) noexcept : ZlibReaderBase(kClosed) {}

  // Will read from the compressed `Reader` provided by `src`.
  explicit ZlibReader(Initializer<Src> src, Options options = Options());

  ZlibReader(ZlibReader&&) = default;
  ZlibReader& operator=(ZlibReader&&) = default;

  // Makes `*this` equivalent to a newly constructed `ZlibReader`. This avoids
  // constructing a temporary `ZlibReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());

  // Returns the object providing and possibly owning the compressed `Reader`.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;
  void SetReadAllHintImpl(bool read_all_hint) override;
  void VerifyEndImpl() override;

 private:
  // The object providing and possibly owning the compressed `Reader`.
  Dependency<Reader*, Src> src_;
};

explicit ZlibReader(Closed) -> ZlibReader<DeleteCtad<Closed>>;
template <typename Src>
explicit ZlibReader(Src&& src,
                    ZlibReaderBase::Options options = ZlibReaderBase::Options())
    -> ZlibReader<TargetT<Src>>;

// Returns `true` if the data look like they have been Zlib-compressed.
//
// The current position of `src` is unchanged.
//
// Precondition: `header != ZlibReaderBase::Header::kRaw`
bool RecognizeZlib(
    Reader& src,
    ZlibReaderBase::Header header = ZlibReaderBase::Header::kZlibOrGzip,
    const RecyclingPoolOptions& recycling_pool_options =
        RecyclingPoolOptions());
bool RecognizeZlib(Reader& src,
                   const RecyclingPoolOptions& recycling_pool_options);

// Returns the claimed uncompressed size of Gzip-compressed data (with
// `ZlibWriterBase::Header::kGzip`) modulo 4G. The compressed stream must not
// have anything appended.
//
// If the data consists of multiple streams, only the last stream is considered.
//
// Returns `std::nullopt` on failure. If the data are not Gzip-compressed, or
// have something appended, then this is generally not detected and the returned
// value will be meaningless. If the data were longer than 4G, then only the
// lowest 32 bits are returned.
//
// The current position of `src` is unchanged.
//
// Precondition: `src.SupportsRandomAccess()`
std::optional<uint32_t> GzipUncompressedSizeModulo4G(Reader& src);

// Implementation details follow.

inline ZlibReaderBase::ZlibReaderBase(
    BufferOptions buffer_options, int window_bits, bool concatenate,
    ZlibDictionary&& dictionary,
    const RecyclingPoolOptions& recycling_pool_options)
    : BufferedReader(buffer_options),
      window_bits_(window_bits),
      concatenate_(concatenate),
      dictionary_(std::move(dictionary)),
      recycling_pool_options_(recycling_pool_options) {}

inline ZlibReaderBase::ZlibReaderBase(ZlibReaderBase&& that) noexcept
    : BufferedReader(static_cast<BufferedReader&&>(that)),
      window_bits_(that.window_bits_),
      concatenate_(that.concatenate_),
      initial_compressed_pos_(that.initial_compressed_pos_),
      truncated_(that.truncated_),
      stream_had_data_(that.stream_had_data_),
      dictionary_(std::move(that.dictionary_)),
      recycling_pool_options_(that.recycling_pool_options_),
      decompressor_(std::move(that.decompressor_)) {}

inline ZlibReaderBase& ZlibReaderBase::operator=(
    ZlibReaderBase&& that) noexcept {
  BufferedReader::operator=(static_cast<BufferedReader&&>(that));
  window_bits_ = that.window_bits_;
  concatenate_ = that.concatenate_;
  initial_compressed_pos_ = that.initial_compressed_pos_;
  truncated_ = that.truncated_;
  stream_had_data_ = that.stream_had_data_;
  dictionary_ = std::move(that.dictionary_);
  recycling_pool_options_ = that.recycling_pool_options_;
  decompressor_ = std::move(that.decompressor_);
  return *this;
}

inline void ZlibReaderBase::Reset(Closed) {
  BufferedReader::Reset(kClosed);
  window_bits_ = 0;
  concatenate_ = false;
  initial_compressed_pos_ = 0;
  truncated_ = false;
  stream_had_data_ = false;
  recycling_pool_options_ = RecyclingPoolOptions();
  decompressor_.reset();
  // Must be destroyed after `decompressor_`.
  dictionary_ = ZlibDictionary();
}

inline void ZlibReaderBase::Reset(
    BufferOptions buffer_options, int window_bits, bool concatenate,
    ZlibDictionary&& dictionary,
    const RecyclingPoolOptions& recycling_pool_options) {
  BufferedReader::Reset(buffer_options);
  window_bits_ = window_bits;
  concatenate_ = concatenate;
  initial_compressed_pos_ = 0;
  truncated_ = false;
  stream_had_data_ = false;
  recycling_pool_options_ = recycling_pool_options;
  decompressor_.reset();
  // Must be destroyed after `decompressor_`.
  dictionary_ = std::move(dictionary);
}

inline int ZlibReaderBase::GetWindowBits(const Options& options) {
  return options.header() == Header::kRaw
             ? -options.window_log()
             : options.window_log() + static_cast<int>(options.header());
}

template <typename Src>
inline ZlibReader<Src>::ZlibReader(Initializer<Src> src, Options options)
    : ZlibReaderBase(options.buffer_options(), GetWindowBits(options),
                     options.concatenate(), std::move(options.dictionary()),
                     options.recycling_pool_options()),
      src_(std::move(src)) {
  Initialize(src_.get());
}

template <typename Src>
inline void ZlibReader<Src>::Reset(Closed) {
  ZlibReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void ZlibReader<Src>::Reset(Initializer<Src> src, Options options) {
  ZlibReaderBase::Reset(options.buffer_options(), GetWindowBits(options),
                        options.concatenate(), std::move(options.dictionary()),
                        options.recycling_pool_options());
  src_.Reset(std::move(src));
  Initialize(src_.get());
}

template <typename Src>
void ZlibReader<Src>::Done() {
  ZlibReaderBase::Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      FailWithoutAnnotation(AnnotateOverSrc(src_->status()));
    }
  }
}

template <typename Src>
void ZlibReader<Src>::SetReadAllHintImpl(bool read_all_hint) {
  ZlibReaderBase::SetReadAllHintImpl(read_all_hint);
  if (src_.IsOwning()) src_->SetReadAllHint(read_all_hint);
}

template <typename Src>
void ZlibReader<Src>::VerifyEndImpl() {
  ZlibReaderBase::VerifyEndImpl();
  if (src_.IsOwning() && ABSL_PREDICT_TRUE(ok())) src_->VerifyEnd();
}

inline bool RecognizeZlib(Reader& src,
                          const RecyclingPoolOptions& recycling_pool_options) {
  return RecognizeZlib(src, ZlibReaderBase::Header::kZlibOrGzip,
                       recycling_pool_options);
}

}  // namespace riegeli

#endif  // RIEGELI_ZLIB_ZLIB_READER_H_


================================================
FILE: riegeli/zlib/zlib_writer.cc
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "riegeli/zlib/zlib_writer.h"

#include <stddef.h>

#include <limits>
#include <memory>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/zlib/zlib_error.h"
#include "riegeli/zlib/zlib_reader.h"
#include "zconf.h"
#include "zlib.h"

namespace riegeli {

static_assert(ZlibWriterBase::Options::kMinCompressionLevel == Z_NO_COMPRESSION,
              "Mismatched constant");
static_assert(ZlibWriterBase::Options::kMaxCompressionLevel ==
                  Z_BEST_COMPRESSION,
              "Mismatched constant");
static_assert(ZlibWriterBase::Options::kMaxWindowLog == MAX_WBITS,
              "Mismatched constant");
static_assert(ZlibWriterBase::Options::kDefaultWindowLog == MAX_WBITS,
              "Mismatched constant");

void ZlibWriterBase::ZStreamDeleter::operator()(void* ptr) const {
  z_stream* const z_stream_ptr = static_cast<z_stream*>(ptr);
  const int zlib_code = deflateEnd(z_stream_ptr);
  RIEGELI_ASSERT(zlib_code == Z_OK || zlib_code == Z_DATA_ERROR)
      << "deflateEnd() failed: " << zlib_code;
  delete z_stream_ptr;
}

void ZlibWriterBase::Initialize(Writer* dest, int compression_level) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of ZlibWriter: null Writer pointer";
  if (ABSL_PREDICT_FALSE(!dest->ok())) {
    FailWithoutAnnotation(AnnotateOverDest(dest->status()));
    return;
  }
  initial_compressed_pos_ = dest->pos();
  compressor_ =
      KeyedRecyclingPool<void, ZStreamKey, ZStreamDeleter>::global(
          recycling_pool_options_)
          .Get(
              ZStreamKey(compression_level, window_bits_),
              [&] {
                auto ptr =
                    riegeli::Maker<z_stream>().UniquePtr<ZStreamDeleter>();
                const int zlib_code =
                    deflateInit2(ptr.get(), compression_level, Z_DEFLATED,
                                 window_bits_, 8, Z_DEFAULT_STRATEGY);
                if (ABSL_PREDICT_FALSE(zlib_code != Z_OK)) {
                  FailOperation("deflateInit2()", zlib_code);
                }
                return ptr;
              },
              [&](void* ptr) {
                z_stream* const z_stream_ptr = static_cast<z_stream*>(ptr);
                const int zlib_code = deflateReset(z_stream_ptr);
                if (ABSL_PREDICT_FALSE(zlib_code != Z_OK)) {
                  FailOperation("deflateReset()", zlib_code);
                }
              });
  if (!dictionary_.empty()) {
    z_stream* const z_stream_ptr = static_cast<z_stream*>(compressor_.get());
    const int zlib_code = deflateSetDictionary(
        z_stream_ptr,
        const_cast<z_const Bytef*>(
            reinterpret_cast<const Bytef*>(dictionary_.data().data())),
        SaturatingIntCast<uInt>(dictionary_.data().size()));
    if (ABSL_PREDICT_FALSE(zlib_code != Z_OK)) {
      FailOperation("deflateSetDictionary()", zlib_code);
    }
  }
}

void ZlibWriterBase::DoneBehindBuffer(absl::string_view src) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::DoneBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!ok())) return;
  Writer& dest = *DestWriter();
  WriteInternal(src, dest, Z_FINISH);
}

void ZlibWriterBase::Done() {
  BufferedWriter::Done();
  compressor_.reset();
  dictionary_ = ZlibDictionary();
  associated_reader_.Reset();
}

inline bool ZlibWriterBase::FailOperation(absl::string_view operation,
                                          int zlib_code) {
  RIEGELI_ASSERT_NE(zlib_code, Z_OK)
      << "Failed precondition of ZlibWriterBase::FailOperation(): "
         "zlib error code not failed";
  RIEGELI_ASSERT(is_open())
      << "Failed precondition of ZlibWriterBase::FailOperation(): "
         "Object closed";
  z_stream* const z_stream_ptr = static_cast<z_stream*>(compressor_.get());
  return Fail(zlib_internal::ZlibErrorToStatus(operation, zlib_code,
                                               z_stream_ptr->msg));
}

absl::Status ZlibWriterBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    Writer& dest = *DestWriter();
    status = dest.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `dest` with the compressed
  // position. Clarify that the current position is the uncompressed position
  // instead of delegating to `BufferedWriter::AnnotateStatusImpl()`.
  return AnnotateOverDest(std::move(status));
}

absl::Status ZlibWriterBase::AnnotateOverDest(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("at uncompressed byte ", pos()));
  }
  return status;
}

bool ZlibWriterBase::WriteInternal(absl::string_view src) {
  RIEGELI_ASSERT(!src.empty())
      << "Failed precondition of BufferedWriter::WriteInternal(): "
         "nothing to write";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedWriter::WriteInternal()";
  Writer& dest = *DestWriter();
  return WriteInternal(src, dest, Z_NO_FLUSH);
}

inline bool ZlibWriterBase::WriteInternal(absl::string_view src, Writer& dest,
                                          int flush) {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of ZlibWriterBase::WriteInternal()";
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  z_stream* const z_stream_ptr = static_cast<z_stream*>(compressor_.get());
  z_stream_ptr->next_in =
      const_cast<z_const Bytef*>(reinterpret_cast<const Bytef*>(src.data()));
  for (;;) {
    // If `z_stream_ptr->avail_out == 0` then `deflate()` returns `Z_BUF_ERROR`,
    // so `dest.Push()` first.
    if (ABSL_PREDICT_FALSE(!dest.Push())) {
      return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
    }
    size_t avail_in =
        PtrDistance(reinterpret_cast<const char*>(z_stream_ptr->next_in),
                    src.data() + src.size());
    int op = flush;
    if (ABSL_PREDICT_FALSE(avail_in > std::numeric_limits<uInt>::max())) {
      avail_in = size_t{std::numeric_limits<uInt>::max()};
      op = Z_NO_FLUSH;
    }
    z_stream_ptr->avail_in = IntCast<uInt>(avail_in);
    z_stream_ptr->next_out = reinterpret_cast<Bytef*>(dest.cursor());
    z_stream_ptr->avail_out = SaturatingIntCast<uInt>(dest.available());
    const int zlib_code = deflate(z_stream_ptr, op);
    dest.set_cursor(reinterpret_cast<char*>(z_stream_ptr->next_out));
    const size_t length_written = PtrDistance(
        src.data(), reinterpret_cast<const char*>(z_stream_ptr->next_in));
    switch (zlib_code) {
      case Z_OK:
        if (z_stream_ptr->avail_out == 0) continue;
        RIEGELI_ASSERT_EQ(z_stream_ptr->avail_in, 0u)
            << "deflate() returned but there are still input data "
               "and output space";
        if (ABSL_PREDICT_FALSE(length_written < src.size())) continue;
        break;
      case Z_STREAM_END:
        break;
      case Z_BUF_ERROR:
        RIEGELI_ASSERT_EQ(op, Z_SYNC_FLUSH)
            << "deflate() returned an unexpected Z_BUF_ERROR";
        break;
      default:
        return FailOperation("deflate()", zlib_code);
    }
    RIEGELI_ASSERT_EQ(length_written, src.size())
        << "deflate() returned but there are still input data";
    move_start_pos(length_written);
    return true;
  }
}

bool ZlibWriterBase::FlushBehindBuffer(absl::string_view src,
                                       FlushType flush_type) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::FlushBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  return WriteInternal(src, dest, Z_SYNC_FLUSH);
}

bool ZlibWriterBase::SupportsReadMode() {
  Writer* const dest = DestWriter();
  return dest != nullptr && dest->SupportsReadMode();
}

Reader* ZlibWriterBase::ReadModeBehindBuffer(Position initial_pos) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::ReadModeBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!ZlibWriterBase::FlushBehindBuffer(
          absl::string_view(), FlushType::kFromObject))) {
    return nullptr;
  }
  Writer& dest = *DestWriter();
  Reader* const compressed_reader = dest.ReadMode(initial_compressed_pos_);
  if (ABSL_PREDICT_FALSE(compressed_reader == nullptr)) {
    FailWithoutAnnotation(AnnotateOverDest(dest.status()));
    return nullptr;
  }
  ZlibReader<>* const reader = associated_reader_.ResetReader(
      compressed_reader,
      ZlibReaderBase::Options()
          .set_header(window_bits_ < 0 ? ZlibReaderBase::Header::kRaw
                                       : static_cast<ZlibReaderBase::Header>(
                                             window_bits_ & ~15))
          .set_window_log(window_bits_ < 0 ? -window_bits_ : window_bits_ & 15)
          .set_dictionary(dictionary_)
          .set_buffer_options(buffer_options())
          .set_recycling_pool_options(recycling_pool_options_));
  reader->Seek(initial_pos);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/zlib/zlib_writer.h
================================================
// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_ZLIB_ZLIB_WRITER_H_
#define RIEGELI_ZLIB_ZLIB_WRITER_H_

#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/compare.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/zlib/zlib_dictionary.h"  // IWYU pragma: export

namespace riegeli {

class Reader;
template <typename Src>
class ZlibReader;

// Template parameter independent part of `ZlibWriter`.
class ZlibWriterBase : public BufferedWriter {
 public:
  // Specifies what format of header to write.
  enum class Header {
    kZlib = 0,   // Zlib header.
    kGzip = 16,  // Gzip header.
    kRaw = -1,   // No header; decompressor must expect no header too.
  };

  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // What format of header to write.
    //
    // Default: `Header::kZlib`.
    static constexpr Header kDefaultHeader = Header::kZlib;
    Options& set_header(Header header) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      header_ = header;
      return *this;
    }
    Options&& set_header(Header header) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_header(header));
    }
    Header header() const { return header_; }

    // Tunes the tradeoff between compression density and compression speed
    // (higher = better density but slower).
    //
    // `compression_level` must be between `kMinCompressionLevel` (0) and
    // `kMaxCompressionLevel` (9). Default: `kDefaultCompressionLevel` (6).
    static constexpr int kMinCompressionLevel = 0;  // `Z_NO_COMPRESSION`
    static constexpr int kMaxCompressionLevel = 9;  // `Z_BEST_COMPRESSION`
    static constexpr int kDefaultCompressionLevel = 6;
    Options& set_compression_level(int compression_level) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GE(compression_level, kMinCompressionLevel)
          << "Failed precondition of "
             "ZlibWriterBase::Options::set_compression_level(): "
             "compression level out of range";
      RIEGELI_ASSERT_LE(compression_level, kMaxCompressionLevel)
          << "Failed precondition of "
             "ZlibWriterBase::Options::set_compression_level(): "
             "compression level out of range";
      compression_level_ = compression_level;
      return *this;
    }
    Options&& set_compression_level(int compression_level) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_compression_level(compression_level));
    }
    int compression_level() const { return compression_level_; }

    // Logarithm of the LZ77 sliding window size. This tunes the tradeoff
    // between compression density and memory usage (higher = better density but
    // more memory).
    //
    // `window_log` must be between `kMinWindowLog` (9) and
    // `kMaxWindowLog` (15). Default: `kDefaultWindowLog` (15).
    static constexpr int kMinWindowLog = 9;
    static constexpr int kMaxWindowLog = 15;      // `MAX_WBITS`
    static constexpr int kDefaultWindowLog = 15;  // `MAX_WBITS`
    Options& set_window_log(int window_log) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GE(window_log, kMinWindowLog)
          << "Failed precondition of "
             "ZlibWriterBase::Options::set_window_log(): "
             "window log out of range";
      RIEGELI_ASSERT_LE(window_log, kMaxWindowLog)
          << "Failed precondition of "
             "ZlibWriterBase::Options::set_window_log(): "
             "window log out of range";
      window_log_ = window_log;
      return *this;
    }
    Options&& set_window_log(int window_log) && ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_window_log(window_log));
    }
    int window_log() const { return window_log_; }

    // Zlib dictionary. The same dictionary must be used for decompression.
    //
    // Default: `ZlibDictionary()`.
    Options& set_dictionary(ZlibDictionary dictionary) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      dictionary_ = std::move(dictionary);
      return *this;
    }
    Options&& set_dictionary(ZlibDictionary dictionary) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_dictionary(std::move(dictionary)));
    }
    ZlibDictionary& dictionary() ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return dictionary_;
    }
    const ZlibDictionary& dictionary() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return dictionary_;
    }

    // Options for a global `KeyedRecyclingPool` of compression contexts.
    //
    // They tune the amount of memory which is kept to speed up creation of new
    // compression sessions, and usage of a background thread to clean it.
    //
    // Default: `RecyclingPoolOptions()`.
    Options& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      recycling_pool_options_ = recycling_pool_options;
      return *this;
    }
    Options&& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_recycling_pool_options(recycling_pool_options));
    }
    const RecyclingPoolOptions& recycling_pool_options() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recycling_pool_options_;
    }

   private:
    Header header_ = kDefaultHeader;
    int compression_level_ = kDefaultCompressionLevel;
    int window_log_ = kDefaultWindowLog;
    ZlibDictionary dictionary_;
    RecyclingPoolOptions recycling_pool_options_;
  };

  // Returns the compressed `Writer`. Unchanged by `Close()`.
  virtual Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool SupportsReadMode() override;

 protected:
  explicit ZlibWriterBase(Closed) noexcept : BufferedWriter(kClosed) {}

  explicit ZlibWriterBase(BufferOptions buffer_options, int window_bits,
                          ZlibDictionary&& dictionary,
                          const RecyclingPoolOptions& recycling_pool_options);

  ZlibWriterBase(ZlibWriterBase&& that) noexcept;
  ZlibWriterBase& operator=(ZlibWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options, int window_bits,
             ZlibDictionary&& dictionary,
             const RecyclingPoolOptions& recycling_pool_options);
  static int GetWindowBits(const Options& options);
  void Initialize(Writer* dest, int compression_level);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverDest(absl::Status status);

  void DoneBehindBuffer(absl::string_view src) override;
  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool WriteInternal(absl::string_view src) override;
  bool FlushBehindBuffer(absl::string_view src, FlushType flush_type) override;
  Reader* ReadModeBehindBuffer(Position initial_pos) override;

 private:
  struct ZStreamDeleter {
    // `void*` is `z_stream*`. Avoid including `zlib.h` in the header.
    void operator()(void* ptr) const;
  };

  struct ZStreamKey : WithEqual<ZStreamKey> {
    ZStreamKey() = default;
    explicit ZStreamKey(int compression_level, int window_bits)
        : compression_level(compression_level), window_bits(window_bits) {}

    friend bool operator==(ZStreamKey a, ZStreamKey b) {
      return a.compression_level == b.compression_level &&
             a.window_bits == b.window_bits;
    }
    template <typename HashState>
    friend HashState AbslHashValue(HashState hash_state, ZStreamKey self) {
      return HashState::combine(std::move(hash_state), self.compression_level,
                                self.window_bits);
    }

    int compression_level;
    int window_bits;
  };

  ABSL_ATTRIBUTE_COLD bool FailOperation(absl::string_view operation,
                                         int zlib_code);
  bool WriteInternal(absl::string_view src, Writer& dest, int flush);

  int window_bits_ = 0;
  ZlibDictionary dictionary_;
  RecyclingPoolOptions recycling_pool_options_;
  Position initial_compressed_pos_ = 0;
  // `void` is `z_stream`. Avoid including `zlib.h` in the header.
  KeyedRecyclingPool<void, ZStreamKey, ZStreamDeleter>::Handle compressor_;

  AssociatedReader<ZlibReader<Reader*>> associated_reader_;
};

// A `Writer` which compresses data with Zlib before passing it to another
// `Writer`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the compressed `Writer`. `Dest` must support
// `Dependency<Writer*, Dest>`, e.g. `Writer*` (not owned, default),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The compressed `Writer` must not be accessed until the `ZlibWriter` is closed
// or no longer used, except that it is allowed to read the destination of the
// compressed `Writer` immediately after `Flush()`.
template <typename Dest = Writer*>
class ZlibWriter : public ZlibWriterBase {
 public:
  // Creates a closed `ZlibWriter`.
  explicit ZlibWriter(Closed) noexcept : ZlibWriterBase(kClosed) {}

  // Will write to the compressed `Writer` provided by `dest`.
  explicit ZlibWriter(Initializer<Dest> dest, Options options = Options());

  ZlibWriter(ZlibWriter&& that) = default;
  ZlibWriter& operator=(ZlibWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `ZlibWriter`. This avoids
  // constructing a temporary `ZlibWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the compressed `Writer`.
  // Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  // The object providing and possibly owning the compressed `Writer`.
  Dependency<Writer*, Dest> dest_;
};

explicit ZlibWriter(Closed) -> ZlibWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit ZlibWriter(Dest&& dest,
                    ZlibWriterBase::Options options = ZlibWriterBase::Options())
    -> ZlibWriter<TargetT<Dest>>;

// Implementation details follow.

inline ZlibWriterBase::ZlibWriterBase(
    BufferOptions buffer_options, int window_bits, ZlibDictionary&& dictionary,
    const RecyclingPoolOptions& recycling_pool_options)
    : BufferedWriter(buffer_options),
      window_bits_(window_bits),
      dictionary_(std::move(dictionary)),
      recycling_pool_options_(recycling_pool_options) {}

inline ZlibWriterBase::ZlibWriterBase(ZlibWriterBase&& that) noexcept
    : BufferedWriter(static_cast<BufferedWriter&&>(that)),
      window_bits_(that.window_bits_),
      dictionary_(std::move(that.dictionary_)),
      recycling_pool_options_(that.recycling_pool_options_),
      initial_compressed_pos_(that.initial_compressed_pos_),
      compressor_(std::move(that.compressor_)),
      associated_reader_(std::move(that.associated_reader_)) {}

inline ZlibWriterBase& ZlibWriterBase::operator=(
    ZlibWriterBase&& that) noexcept {
  BufferedWriter::operator=(static_cast<BufferedWriter&&>(that));
  window_bits_ = that.window_bits_;
  dictionary_ = std::move(that.dictionary_);
  recycling_pool_options_ = that.recycling_pool_options_;
  initial_compressed_pos_ = that.initial_compressed_pos_;
  compressor_ = std::move(that.compressor_);
  associated_reader_ = std::move(that.associated_reader_);
  return *this;
}

inline void ZlibWriterBase::Reset(Closed) {
  BufferedWriter::Reset(kClosed);
  window_bits_ = 0;
  recycling_pool_options_ = RecyclingPoolOptions();
  initial_compressed_pos_ = 0;
  compressor_.reset();
  // Must be destroyed after `compressor_`.
  dictionary_ = ZlibDictionary();
  associated_reader_.Reset();
}

inline void ZlibWriterBase::Reset(
    BufferOptions buffer_options, int window_bits, ZlibDictionary&& dictionary,
    const RecyclingPoolOptions& recycling_pool_options) {
  BufferedWriter::Reset(buffer_options);
  window_bits_ = window_bits;
  recycling_pool_options_ = recycling_pool_options;
  initial_compressed_pos_ = 0;
  compressor_.reset();
  // Must be destroyed after `compressor_`.
  dictionary_ = std::move(dictionary);
  associated_reader_.Reset();
}

inline int ZlibWriterBase::GetWindowBits(const Options& options) {
  return options.header() == Header::kRaw
             ? -options.window_log()
             : options.window_log() + static_cast<int>(options.header());
}

template <typename Dest>
inline ZlibWriter<Dest>::ZlibWriter(Initializer<Dest> dest, Options options)
    : ZlibWriterBase(options.buffer_options(), GetWindowBits(options),
                     std::move(options.dictionary()),
                     options.recycling_pool_options()),
      dest_(std::move(dest)) {
  Initialize(dest_.get(), options.compression_level());
}

template <typename Dest>
inline void ZlibWriter<Dest>::Reset(Closed) {
  ZlibWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void ZlibWriter<Dest>::Reset(Initializer<Dest> dest, Options options) {
  ZlibWriterBase::Reset(options.buffer_options(), GetWindowBits(options),
                        std::move(options.dictionary()),
                        options.recycling_pool_options());
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options.compression_level());
}

template <typename Dest>
void ZlibWriter<Dest>::Done() {
  ZlibWriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
}

template <typename Dest>
bool ZlibWriter<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ZlibWriterBase::FlushImpl(flush_type))) return false;
  if (flush_type != FlushType::kFromObject || dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Flush(flush_type))) {
      return FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
  return true;
}

}  // namespace riegeli

#endif  // RIEGELI_ZLIB_ZLIB_WRITER_H_


================================================
FILE: riegeli/zstd/BUILD
================================================
load("@rules_cc//cc:defs.bzl", "cc_library")

package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

licenses(["notice"])

cc_library(
    name = "zstd_reader",
    srcs = ["zstd_reader.cc"],
    hdrs = ["zstd_reader.h"],
    # zstd_reader.cc has #define before #include to influence what the included
    # files provide.
    features = ["-use_header_modules"],
    deps = [
        ":zstd_dictionary",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:recycling_pool",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:buffer_options",
        "//riegeli/bytes:buffered_reader",
        "//riegeli/bytes:reader",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@net_zstd//:zstd",
    ],
)

cc_library(
    name = "zstd_writer",
    srcs = ["zstd_writer.cc"],
    hdrs = ["zstd_writer.h"],
    # zstd_writer.cc has #define before #include to influence what the included
    # files provide.
    features = ["-use_header_modules"],
    deps = [
        ":zstd_dictionary",
        ":zstd_reader",
        "//riegeli/base:arithmetic",
        "//riegeli/base:assert",
        "//riegeli/base:dependency",
        "//riegeli/base:initializer",
        "//riegeli/base:object",
        "//riegeli/base:recycling_pool",
        "//riegeli/base:status",
        "//riegeli/base:types",
        "//riegeli/bytes:buffer_options",
        "//riegeli/bytes:buffered_writer",
        "//riegeli/bytes:reader",
        "//riegeli/bytes:writer",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/status",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:string_view",
        "@net_zstd//:zstd",
    ],
)

cc_library(
    name = "zstd_dictionary",
    srcs = ["zstd_dictionary.cc"],
    hdrs = ["zstd_dictionary.h"],
    # zstd_dictionary.cc has #define before #include to influence what the
    # included files provide.
    features = ["-use_header_modules"],
    visibility = ["//visibility:private"],
    deps = [
        "//riegeli/base:arithmetic",
        "//riegeli/base:bytes_ref",
        "//riegeli/base:initializer",
        "//riegeli/base:shared_ptr",
        "@com_google_absl//absl/base",
        "@com_google_absl//absl/base:core_headers",
        "@com_google_absl//absl/strings:string_view",
        "@com_google_absl//absl/synchronization",
        "@net_zstd//:zstd",
    ],
)


================================================
FILE: riegeli/zstd/zstd_dictionary.cc
================================================
#include "absl/base/attributes.h"
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Enables the experimental zstd API:
//  * `ZSTD_createCDict_advanced()`
//  * `ZSTD_createDDict_advanced()`
//  * `ZSTD_dictLoadMethod_e`
//  * `ZSTD_dictContentType_e`
#define ZSTD_STATIC_LINKING_ONLY

#include <stdint.h>

#include <memory>
#include <utility>

#include "absl/base/call_once.h"
#include "absl/strings/string_view.h"
#include "absl/synchronization/mutex.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/shared_ptr.h"
#include "riegeli/zstd/zstd_dictionary.h"
#include "zstd.h"

namespace riegeli {

// Constants are defined as integer literals in zstd_dictionary.h and asserted
// here to avoid depending on `ZSTD_STATIC_LINKING_ONLY` in zstd_dictionary.h.
static_assert(
    static_cast<ZSTD_dictContentType_e>(ZstdDictionary::Type::kAuto) ==
            ZSTD_dct_auto &&
        static_cast<ZSTD_dictContentType_e>(ZstdDictionary::Type::kRaw) ==
            ZSTD_dct_rawContent &&
        static_cast<ZSTD_dictContentType_e>(
            ZstdDictionary::Type::kSerialized) == ZSTD_dct_fullDict,
    "Enum values of ZstdDictionary::Type disagree with ZSTD_dct "
    "constants");

inline ZstdDictionary::ZSTD_CDictHandle
ZstdDictionary::Repr::PrepareCompressionDictionary(
    int compression_level) const {
  SharedPtr<const ZSTD_CDictCache> compression_cache;
  {
    absl::MutexLock lock(compression_cache_mutex_);
    if (compression_cache_ == nullptr ||
        compression_cache_->compression_level != compression_level) {
      compression_cache_.Reset(riegeli::Maker(compression_level));
    }
    compression_cache = compression_cache_;
  }
  absl::call_once(compression_cache->compression_once, [&] {
    compression_cache->compression_dictionary.reset(ZSTD_createCDict_advanced(
        data_.data(), data_.size(), ZSTD_dlm_byRef,
        static_cast<ZSTD_dictContentType_e>(type_),
        ZSTD_getCParams(compression_level, 0, data_.size()), ZSTD_defaultCMem));
  });
  ZSTD_CDict* const ptr = compression_cache->compression_dictionary.get();
  return ZSTD_CDictHandle(ptr,
                          ZSTD_CDictReleaser{std::move(compression_cache)});
}

inline const ZSTD_DDict* ZstdDictionary::Repr::PrepareDecompressionDictionary()
    const {
  absl::call_once(decompression_once_, [&] {
    decompression_dictionary_.reset(ZSTD_createDDict_advanced(
        data_.data(), data_.size(), ZSTD_dlm_byRef,
        static_cast<ZSTD_dictContentType_e>(type_), ZSTD_defaultCMem));
  });
  return decompression_dictionary_.get();
}

ZstdDictionary::ZSTD_CDictHandle ZstdDictionary::PrepareCompressionDictionary(
    int compression_level) const {
  if (repr_ == nullptr) return nullptr;
  return repr_->PrepareCompressionDictionary(compression_level);
}

const ZSTD_DDict* ZstdDictionary::PrepareDecompressionDictionary() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  if (repr_ == nullptr) return nullptr;
  return repr_->PrepareDecompressionDictionary();
}

uint32_t ZstdDictionary::DictId() const {
  if (repr_ == nullptr) return 0;
  return IntCast<uint32_t>(
      ZSTD_getDictID_fromDict(repr_->data().data(), repr_->data().size()));
}

}  // namespace riegeli


================================================
FILE: riegeli/zstd/zstd_dictionary.h
================================================
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_ZSTD_ZSTD_DICTIONARY_H_
#define RIEGELI_ZSTD_ZSTD_DICTIONARY_H_

// IWYU pragma: private, include "riegeli/zstd/zstd_reader.h"
// IWYU pragma: private, include "riegeli/zstd/zstd_writer.h"

#include <stdint.h>

#include <memory>
#include <string>
#include <type_traits>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/call_once.h"
#include "absl/base/thread_annotations.h"
#include "absl/strings/string_view.h"
#include "absl/synchronization/mutex.h"
#include "riegeli/base/bytes_ref.h"
#include "riegeli/base/maker.h"
#include "riegeli/base/shared_ptr.h"
#include "zstd.h"

namespace riegeli {

// Stores an optional Zstd dictionary for compression and decompression.
//
// An empty dictionary is equivalent to having no dictionary.
//
// A `ZstdDictionary` object can own the dictionary data, or can hold a pointer
// to unowned dictionary data which must not be changed until the last
// `ZstdReader` or `ZstdWriter` using this dictionary is closed or no longer
// used. A `ZstdDictionary` object also holds prepared structures derived from
// dictionary data. If the same dictionary is needed for multiple compression
// or decompression sessions, the `ZstdDictionary` object can be reused to avoid
// preparing them again.
//
// The prepared dictionary for compression depends on the compression level. At
// most one prepared dictionary is cached, corresponding to the last compression
// level used.
//
// Copying a `ZstdDictionary` object is cheap, sharing the actual dictionary.
class ZstdDictionary {
 private:
  struct ZSTD_CDictReleaser;

 public:
  // Interpretation of dictionary data.
  enum class Type {
    // If dictionary data begin with `ZSTD_MAGIC_DICTIONARY`, then like
    // `kSerialized`, otherwise like `kRaw`.
    kAuto = 0,
    // Dictionary data should contain sequences that are commonly seen in the
    // data being compressed.
    kRaw = 1,
    // Shared with the dictBuilder library.
    kSerialized = 2,
  };

  // Owning handle to a compression dictionary in the prepared form.
  using ZSTD_CDictHandle = std::unique_ptr<ZSTD_CDict, ZSTD_CDictReleaser>;

  // Creates an empty `ZstdDictionary`.
  ZstdDictionary() = default;

  ZstdDictionary(const ZstdDictionary& that) = default;
  ZstdDictionary& operator=(const ZstdDictionary& that) = default;

  ZstdDictionary(ZstdDictionary&& that) = default;
  ZstdDictionary& operator=(ZstdDictionary&& that) = default;

  // Resets the `ZstdDictionary` to the empty state.
  ZstdDictionary& Reset() & ABSL_ATTRIBUTE_LIFETIME_BOUND;
  ZstdDictionary&& Reset() && ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(Reset());
  }

  // Sets a dictionary.
  ZstdDictionary& set_data(BytesInitializer data, Type type = Type::kAuto) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND;
  ZstdDictionary&& set_data(BytesInitializer data, Type type = Type::kAuto) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_data(std::move(data), type));
  }

  // Like `set_data()`, but does not take ownership of `data`, which must not
  // be changed until the last `ZstdReader` or `ZstdWriter` using this
  // dictionary is closed or no longer used.
  ZstdDictionary& set_data_unowned(absl::string_view data
                                       ABSL_ATTRIBUTE_LIFETIME_BOUND,
                                   Type type = Type::kAuto) &
      ABSL_ATTRIBUTE_LIFETIME_BOUND;
  ZstdDictionary&& set_data_unowned(absl::string_view data
                                        ABSL_ATTRIBUTE_LIFETIME_BOUND,
                                    Type type = Type::kAuto) &&
      ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return std::move(set_data_unowned(data, type));
  }

  // Returns `true` if no dictionary is present.
  bool empty() const;

  // Returns the dictionary data.
  absl::string_view data() const ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Returns the compression dictionary in the prepared form, or `nullptr` if
  // no dictionary is present or `ZSTD_createCDict_advanced()` failed.
  ZSTD_CDictHandle PrepareCompressionDictionary(int compression_level) const;

  // Returns the decompression dictionary in the prepared form, or `nullptr` if
  // no dictionary is present or `ZSTD_createDDict_advanced()` failed.
  //
  // The dictionary is owned by `*this`.
  const ZSTD_DDict* PrepareDecompressionDictionary() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND;

  // Returns the dictionary ID, or 0 is no dictionary is present.
  uint32_t DictId() const;

 private:
  enum class Ownership { kCopied, kUnowned };

  struct ZSTD_CDictDeleter {
    void operator()(ZSTD_CDict* ptr) const { ZSTD_freeCDict(ptr); }
  };

  struct ZSTD_CDictCache;

  struct ZSTD_CDictReleaser {
    void operator()(ABSL_ATTRIBUTE_UNUSED ZSTD_CDict* ptr) {
      // `*ptr` is owned by `*compression_cache`.
      compression_cache.Reset();
    }
    SharedPtr<const ZSTD_CDictCache> compression_cache;
  };

  class Repr;

  SharedPtr<const Repr> repr_;
};

// Implementation details follow.

class ZstdDictionary::Repr {
 public:
  // Owns a copy of `data`.
  explicit Repr(Type type, BytesInitializer data,
                std::integral_constant<Ownership, Ownership::kCopied>)
      : type_(type), owned_data_(std::move(data)), data_(owned_data_) {}

  // Does not take ownership of `data`, which must not be changed until the
  // last `ZstdWriter` or `ZstdReader` using this dictionary is closed or no
  // longer used.
  explicit Repr(Type type, absl::string_view data,
                std::integral_constant<Ownership, Ownership::kUnowned>)
      : type_(type), data_(data) {}

  Repr(const Repr&) = delete;
  Repr& operator=(const Repr&) = delete;

  // Returns the compression dictionary in the prepared form, or `nullptr` if
  // no dictionary is present or `ZSTD_createCDict_advanced()` failed.
  ZSTD_CDictHandle PrepareCompressionDictionary(int compression_level) const;

  // Returns the decompression dictionary in the prepared form, or `nullptr`
  // if no dictionary is present or `ZSTD_createDDict_advanced()` failed.
  //
  // The dictionary is owned by `*this`.
  const ZSTD_DDict* PrepareDecompressionDictionary() const
      ABSL_ATTRIBUTE_LIFETIME_BOUND;

  absl::string_view data() const { return data_; }

 private:
  struct ZSTD_DDictDeleter {
    void operator()(ZSTD_DDict* ptr) const { ZSTD_freeDDict(ptr); }
  };

  Type type_;
  std::string owned_data_;
  absl::string_view data_;

  mutable absl::Mutex compression_cache_mutex_;
  mutable SharedPtr<const ZSTD_CDictCache> compression_cache_
      ABSL_GUARDED_BY(compression_cache_mutex_);

  mutable absl::once_flag decompression_once_;
  mutable std::unique_ptr<ZSTD_DDict, ZSTD_DDictDeleter>
      decompression_dictionary_;
};

// Holds a compression dictionary prepared for a particular compression level.
//
// If several callers of `ZstdDictionary` need a prepared dictionary with the
// same compression level at the same time, they wait for the first one to
// prepare it, and they share it.
//
// If the callers need it with different compression levels, they do not wait.
// The dictionary will be prepared again if varying compression levels later
// repeat, because the cache holds at most one entry.
struct ZstdDictionary::ZSTD_CDictCache {
  explicit ZSTD_CDictCache(int compression_level)
      : compression_level(compression_level) {}

  int compression_level;
  mutable absl::once_flag compression_once;
  mutable std::unique_ptr<ZSTD_CDict, ZSTD_CDictDeleter> compression_dictionary;
};

inline ZstdDictionary& ZstdDictionary::Reset() & ABSL_ATTRIBUTE_LIFETIME_BOUND {
  repr_.Reset();
  return *this;
}

inline ZstdDictionary& ZstdDictionary::set_data(BytesInitializer data,
                                                Type type) &
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  repr_.Reset(
      riegeli::Maker(type, std::move(data),
                     std::integral_constant<Ownership, Ownership::kCopied>()));
  return *this;
}

inline ZstdDictionary& ZstdDictionary::set_data_unowned(
    absl::string_view data ABSL_ATTRIBUTE_LIFETIME_BOUND, Type type) &
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  repr_.Reset(riegeli::Maker(
      type, data, std::integral_constant<Ownership, Ownership::kUnowned>()));
  return *this;
}

inline bool ZstdDictionary::empty() const {
  return repr_ == nullptr || repr_->data().empty();
}

inline absl::string_view ZstdDictionary::data() const
    ABSL_ATTRIBUTE_LIFETIME_BOUND {
  if (repr_ == nullptr) return absl::string_view();
  return repr_->data();
}

}  // namespace riegeli

#endif  // RIEGELI_ZSTD_ZSTD_DICTIONARY_H_


================================================
FILE: riegeli/zstd/zstd_reader.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Enables the experimental zstd API:
//  * `ZSTD_FRAMEHEADERSIZE_PREFIX()`
//  * `ZSTD_FRAMEHEADERSIZE_MAX`
//  * `ZSTD_f_zstd1`
//  * `ZSTD_d_stableOutBuffer`
//  * `ZSTD_skippableFrame`
//  * `ZSTD_frameHeader`
//  * `ZSTD_getFrameHeader()`
#define ZSTD_STATIC_LINKING_ONLY

#include "riegeli/zstd/zstd_reader.h"

#include <stddef.h>
#include <stdint.h>

#include <limits>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_reader.h"
#include "riegeli/bytes/reader.h"
#include "zstd.h"

namespace riegeli {

void ZstdReaderBase::Initialize(Reader* src) {
  RIEGELI_ASSERT_NE(src, nullptr)
      << "Failed precondition of ZstdReader: null Reader pointer";
  if (ABSL_PREDICT_FALSE(!src->ok()) && src->available() == 0) {
    FailWithoutAnnotation(AnnotateOverSrc(src->status()));
    return;
  }
  initial_compressed_pos_ = src->pos();
  InitializeDecompressor(*src);
}

inline void ZstdReaderBase::InitializeDecompressor(Reader& src) {
  decompressor_ =
      RecyclingPool<ZSTD_DCtx, ZSTD_DCtxDeleter>::global(
          recycling_pool_options_)
          .Get(
              [] {
                return std::unique_ptr<ZSTD_DCtx, ZSTD_DCtxDeleter>(
                    ZSTD_createDCtx());
              },
              [](ZSTD_DCtx* decompressor) {
                {
                  const size_t result = ZSTD_DCtx_reset(
                      decompressor, ZSTD_reset_session_and_parameters);
                  RIEGELI_ASSERT(!ZSTD_isError(result))
                      << "ZSTD_DCtx_reset() failed: "
                      << ZSTD_getErrorName(result);
                }
#if ZSTD_VERSION_NUMBER <= 10405
                // Workaround for https://github.com/facebook/zstd/issues/2331
                {
                  const size_t result = ZSTD_DCtx_setParameter(
                      decompressor, ZSTD_d_stableOutBuffer, 0);
                  RIEGELI_ASSERT(!ZSTD_isError(result))
                      << "ZSTD_DCtx_setParameter(ZSTD_d_stableOutBuffer) "
                         "failed: "
                      << ZSTD_getErrorName(result);
                }
#endif
              });
  if (ABSL_PREDICT_FALSE(decompressor_ == nullptr)) {
    Fail(absl::InternalError("ZSTD_createDCtx() failed"));
    return;
  }
  {
    // Maximum window size could also be found with
    // `ZSTD_dParam_getBounds(ZSTD_d_windowLogMax)`.
    const size_t result =
        ZSTD_DCtx_setParameter(decompressor_.get(), ZSTD_d_windowLogMax,
                               sizeof(size_t) == 4 ? 30 : 31);
    if (ABSL_PREDICT_FALSE(ZSTD_isError(result))) {
      Fail(absl::InternalError(
          absl::StrCat("ZSTD_DCtx_setParameter(ZSTD_d_windowLogMax) failed: ",
                       ZSTD_getErrorName(result))));
      return;
    }
  }
  if (!dictionary_.empty()) {
    const ZSTD_DDict* const decompression_dictionary =
        dictionary_.PrepareDecompressionDictionary();
    if (ABSL_PREDICT_FALSE(decompression_dictionary == nullptr)) {
      Fail(absl::InternalError("ZSTD_createDDict_advanced() failed"));
      return;
    }
    const size_t result =
        ZSTD_DCtx_refDDict(decompressor_.get(), decompression_dictionary);
    if (ABSL_PREDICT_FALSE(ZSTD_isError(result))) {
      Fail(absl::InternalError(absl::StrCat("ZSTD_DCtx_refDDict() failed: ",
                                            ZSTD_getErrorName(result))));
      return;
    }
  }
  if (!concatenate_ && exact_size() == std::nullopt) {
    set_exact_size(ZstdUncompressedSize(src));
  }
  just_initialized_ = true;
}

void ZstdReaderBase::Done() {
  if (ABSL_PREDICT_FALSE(truncated_) && growing_source_) {
    Reader& src = *SrcReader();
    FailWithoutAnnotation(AnnotateOverSrc(src.AnnotateStatus(
        absl::InvalidArgumentError("Truncated Zstd-compressed stream"))));
  }
  BufferedReader::Done();
  decompressor_.reset();
  dictionary_ = ZstdDictionary();
}

absl::Status ZstdReaderBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    if (ABSL_PREDICT_FALSE(truncated_)) {
      status = Annotate(status, "reading truncated Zstd-compressed stream");
    }
    Reader& src = *SrcReader();
    status = src.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `src` with the compressed position.
  // Clarify that the current position is the uncompressed position instead of
  // delegating to `BufferedReader::AnnotateStatusImpl()`.
  return AnnotateOverSrc(std::move(status));
}

absl::Status ZstdReaderBase::AnnotateOverSrc(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("at uncompressed byte ", pos()));
  }
  return status;
}

bool ZstdReaderBase::ReadInternal(size_t min_length, size_t max_length,
                                  char* dest) {
  RIEGELI_ASSERT_GT(min_length, 0u)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "nothing to read";
  RIEGELI_ASSERT_GE(max_length, min_length)
      << "Failed precondition of BufferedReader::ReadInternal(): "
         "max_length < min_length";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedReader::ReadInternal()";
  Reader& src = *SrcReader();
  truncated_ = false;
  if (just_initialized_ && !concatenate_ && exact_size() == std::nullopt) {
    // Try again in case the source has grown.
    set_exact_size(ZstdUncompressedSize(src));
  }
  size_t effective_min_length = min_length;
  if (just_initialized_ && !growing_source_ && exact_size() != std::nullopt &&
      max_length >= *exact_size()) {
    // Avoid a memory copy from an internal buffer of the Zstd engine to `dest`
    // by promising to decompress all remaining data to `dest`.
    const size_t result =
        ZSTD_DCtx_setParameter(decompressor_.get(), ZSTD_d_stableOutBuffer, 1);
    if (ABSL_PREDICT_FALSE(ZSTD_isError(result))) {
      return Fail(absl::InternalError(absl::StrCat(
          "ZSTD_DCtx_setParameter(ZSTD_d_stableOutBuffer) failed: ",
          ZSTD_getErrorName(result))));
    }
    effective_min_length = std::numeric_limits<size_t>::max();
  }
  just_initialized_ = false;
  max_length = UnsignedMin(max_length,
                           std::numeric_limits<Position>::max() - limit_pos());
  ZSTD_outBuffer output = {dest, max_length, 0};
  for (;;) {
    ZSTD_inBuffer input = {src.cursor(), src.available(), 0};
    const size_t result =
        ZSTD_decompressStream(decompressor_.get(), &output, &input);
    src.set_cursor(static_cast<const char*>(input.src) + input.pos);
    if (ABSL_PREDICT_FALSE(result == 0)) {
      if (concatenate_) {
        if (output.pos >= min_length) {
          move_limit_pos(output.pos);
          return true;
        }
        continue;
      }
      decompressor_.reset();
      move_limit_pos(output.pos);
      // Avoid `BufferedReader` allocating another buffer.
      set_exact_size(limit_pos());
      return output.pos >= min_length;
    }
    if (ABSL_PREDICT_FALSE(ZSTD_isError(result))) {
      Fail(absl::InvalidArgumentError(absl::StrCat(
          "ZSTD_decompressStream() failed: ", ZSTD_getErrorName(result))));
      move_limit_pos(output.pos);
      return output.pos >= min_length;
    }
    if (output.pos >= effective_min_length) {
      move_limit_pos(output.pos);
      return true;
    }
    if (ABSL_PREDICT_FALSE(input.pos < input.size)) {
      RIEGELI_ASSERT_EQ(output.pos, output.size)
          << "ZSTD_decompressStream() returned but there are still "
             "input data and output space";
      RIEGELI_ASSERT_EQ(output.pos,
                        std::numeric_limits<Position>::max() - limit_pos())
          << "The position does not overflow but the output buffer is full, "
             "while less than min_length was output, which implies that "
             "ZSTD_decompressStream() wants to output more than the "
             "expected decompressed size to a flat buffer";
      move_limit_pos(output.pos);
      return FailOverflow();
    }
    if (ABSL_PREDICT_FALSE(!src.Pull(1, result))) {
      move_limit_pos(output.pos);
      if (ABSL_PREDICT_FALSE(!src.ok())) {
        FailWithoutAnnotation(AnnotateOverSrc(src.status()));
      } else if (ABSL_PREDICT_FALSE(!concatenate_ || input.pos > 0)) {
        if (!growing_source_) {
          Fail(absl::InvalidArgumentError("Truncated Zstd-compressed stream"));
        }
        truncated_ = true;
      }
      return output.pos >= min_length;
    }
  }
}

void ZstdReaderBase::ExactSizeReached() {
  if (decompressor_ == nullptr) return;
  // This is especially important if `exact_size() == 0` because in that case
  // `ReadInternal()` was never called and the compressed stream was not
  // consumed.
  char buffer[1];
  if (ABSL_PREDICT_FALSE(ZstdReaderBase::ReadInternal(1, 1, buffer))) {
    decompressor_.reset();
    Fail(absl::FailedPreconditionError(
        "Uncompressed size reached but more data can be decompressed, "
        "which implies that seeking back and reading again encountered "
        "changed Zstd-compressed data"));
  }
}

bool ZstdReaderBase::ToleratesReadingAhead() {
  Reader* const src = SrcReader();
  return src != nullptr && src->ToleratesReadingAhead();
}

bool ZstdReaderBase::SupportsRewind() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsRewind();
}

bool ZstdReaderBase::SeekBehindBuffer(Position new_pos) {
  RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos())
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "position in the buffer, use Seek() instead";
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedReader::SeekBehindBuffer(): "
         "buffer not empty";
  if (new_pos <= limit_pos()) {
    // Seeking backwards.
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    Reader& src = *SrcReader();
    truncated_ = false;
    set_buffer();
    set_limit_pos(0);
    decompressor_.reset();
    if (ABSL_PREDICT_FALSE(!src.Seek(initial_compressed_pos_))) {
      return FailWithoutAnnotation(AnnotateOverSrc(src.StatusOrAnnotate(
          absl::DataLossError("Zstd-compressed stream got truncated"))));
    }
    InitializeDecompressor(src);
    if (ABSL_PREDICT_FALSE(!ok())) return false;
    if (new_pos == 0) return true;
  }
  return BufferedReader::SeekBehindBuffer(new_pos);
}

bool ZstdReaderBase::SupportsNewReader() {
  Reader* const src = SrcReader();
  return src != nullptr && src->SupportsNewReader();
}

std::unique_ptr<Reader> ZstdReaderBase::NewReaderImpl(Position initial_pos) {
  if (ABSL_PREDICT_FALSE(!ok())) return nullptr;
  // `NewReaderImpl()` is thread-safe from this point
  // if `SrcReader()->SupportsNewReader()`.
  Reader& src = *SrcReader();
  std::unique_ptr<Reader> compressed_reader =
      src.NewReader(initial_compressed_pos_);
  if (ABSL_PREDICT_FALSE(compressed_reader == nullptr)) {
    FailWithoutAnnotation(AnnotateOverSrc(src.status()));
    return nullptr;
  }
  std::unique_ptr<Reader> reader =
      std::make_unique<ZstdReader<std::unique_ptr<Reader>>>(
          std::move(compressed_reader),
          ZstdReaderBase::Options()
              .set_growing_source(growing_source_)
              .set_concatenate(concatenate_)
              .set_dictionary(dictionary_)
              .set_buffer_options(buffer_options())
              .set_recycling_pool_options(recycling_pool_options_));
  reader->Seek(initial_pos);
  return reader;
}

namespace {

inline bool GetFrameHeader(Reader& src, ZSTD_frameHeader& header) {
  if (ABSL_PREDICT_FALSE(!src.Pull(ZSTD_FRAMEHEADERSIZE_PREFIX(ZSTD_f_zstd1),
                                   ZSTD_FRAMEHEADERSIZE_MAX))) {
    return false;
  }
  for (;;) {
    const size_t result =
        ZSTD_getFrameHeader(&header, src.cursor(), src.available());
    if (ABSL_PREDICT_TRUE(result == 0)) return true;
    if (ABSL_PREDICT_FALSE(ZSTD_isError(result))) return false;
    if (ABSL_PREDICT_FALSE(!src.Pull(result))) return false;
  }
}

}  // namespace

bool RecognizeZstd(Reader& src) {
  ZSTD_frameHeader header;
  return GetFrameHeader(src, header);
}

std::optional<Position> ZstdUncompressedSize(Reader& src) {
  ZSTD_frameHeader header;
  if (ABSL_PREDICT_FALSE(!GetFrameHeader(src, header))) return std::nullopt;
  if (header.frameType == ZSTD_skippableFrame) return 0;
  if (header.frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN) return std::nullopt;
  return IntCast<Position>(header.frameContentSize);
}

std::optional<uint32_t> ZstdDictId(Reader& src) {
  ZSTD_frameHeader header;
  if (ABSL_PREDICT_FALSE(!GetFrameHeader(src, header))) return std::nullopt;
  return IntCast<uint32_t>(header.dictID);
}

}  // namespace riegeli


================================================
FILE: riegeli/zstd/zstd_reader.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_ZSTD_ZSTD_READER_H_
#define RIEGELI_ZSTD_ZSTD_READER_H_

#include <stddef.h>
#include <stdint.h>

#include <memory>
#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_reader.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/zstd/zstd_dictionary.h"  // IWYU pragma: export
#include "zstd.h"

namespace riegeli {

// Template parameter independent part of `ZstdReader`.
class ZstdReaderBase : public BufferedReader {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // If `true`, supports decompressing as much as possible from a truncated
    // source, then retrying when the source has grown. This has a small
    // performance penalty.
    //
    // Default: `false`.
    Options& set_growing_source(bool growing_source) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      growing_source_ = growing_source;
      return *this;
    }
    Options&& set_growing_source(bool growing_source) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_growing_source(growing_source));
    }
    bool growing_source() const { return growing_source_; }

    // If `true`, concatenated compressed frames are decoded to concatenation
    // of their decompressed contents. An empty compressed stream is decoded to
    // empty decompressed contents.
    //
    // If `false`, exactly one compressed frame is consumed.
    //
    // Default: `false`.
    Options& set_concatenate(bool concatenate) & ABSL_ATTRIBUTE_LIFETIME_BOUND {
      concatenate_ = concatenate;
      return *this;
    }
    Options&& set_concatenate(bool concatenate) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_concatenate(concatenate));
    }
    bool concatenate() const { return concatenate_; }

    // Zstd dictionary. The same dictionary must have been used for compression,
    // except that it is allowed to supply a dictionary for decompression even
    // if no dictionary was used for compression.
    //
    // Default: `ZstdDictionary()`.
    Options& set_dictionary(ZstdDictionary dictionary) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      dictionary_ = std::move(dictionary);
      return *this;
    }
    Options&& set_dictionary(ZstdDictionary dictionary) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_dictionary(std::move(dictionary)));
    }
    ZstdDictionary& dictionary() ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return dictionary_;
    }
    const ZstdDictionary& dictionary() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return dictionary_;
    }

    // Options for a global `RecyclingPool` of decompression contexts.
    //
    // They tune the amount of memory which is kept to speed up creation of new
    // decompression sessions, and usage of a background thread to clean it.
    //
    // Default: `RecyclingPoolOptions()`.
    Options& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      recycling_pool_options_ = recycling_pool_options;
      return *this;
    }
    Options&& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_recycling_pool_options(recycling_pool_options));
    }
    const RecyclingPoolOptions& recycling_pool_options() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recycling_pool_options_;
    }

   private:
    bool growing_source_ = false;
    bool concatenate_ = false;
    ZstdDictionary dictionary_;
    RecyclingPoolOptions recycling_pool_options_;
  };

  // Returns the compressed `Reader`. Unchanged by `Close()`.
  virtual Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  // Returns `true` if the source is truncated (without a clean end of the
  // compressed stream) at the current position. In such case, if the source
  // does not grow, `Close()` will fail.
  //
  // Precondition: `Options::concatenate()` was `false`.
  bool truncated() const {
    RIEGELI_ASSERT(!concatenate_)
        << "Failed precondition of ZstdReaderBase::truncated(): "
           "Options::concatenate() is true";
    return truncated_ && available() == 0;
  }

  bool ToleratesReadingAhead() override;
  bool SupportsRewind() override;
  bool SupportsNewReader() override;

 protected:
  explicit ZstdReaderBase(Closed) noexcept : BufferedReader(kClosed) {}

  explicit ZstdReaderBase(BufferOptions buffer_options, bool growing_source,
                          bool concatenate, ZstdDictionary&& dictionary,
                          const RecyclingPoolOptions& recycling_pool_options);

  ZstdReaderBase(ZstdReaderBase&& that) noexcept;
  ZstdReaderBase& operator=(ZstdReaderBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options, bool growing_source,
             bool concatenate, ZstdDictionary&& dictionary,
             const RecyclingPoolOptions& recycling_pool_options);
  void Initialize(Reader* src);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverSrc(absl::Status status);

  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  bool ReadInternal(size_t min_length, size_t max_length, char* dest) override;
  void ExactSizeReached() override;
  bool SeekBehindBuffer(Position new_pos) override;
  std::unique_ptr<Reader> NewReaderImpl(Position initial_pos) override;

 private:
  struct ZSTD_DCtxDeleter {
    void operator()(ZSTD_DCtx* ptr) const { ZSTD_freeDCtx(ptr); }
  };

  void InitializeDecompressor(Reader& src);

  // If `true`, supports decompressing as much as possible from a truncated
  // source, then retrying when the source has grown.
  bool growing_source_ = false;
  bool concatenate_ = false;
  Position initial_compressed_pos_ = 0;
  // If `true`, the source is truncated (without a clean end of the compressed
  // stream) at the current position. If the source does not grow, `Close()`
  // will fail.
  bool truncated_ = false;
  // If `true`, calling `ZSTD_DCtx_setParameter()` is valid.
  bool just_initialized_ = false;
  ZstdDictionary dictionary_;
  RecyclingPoolOptions recycling_pool_options_;
  // If `ok()` but `decompressor_ == nullptr` then all data have been
  // decompressed, `exact_size() == limit_pos()`, and `ReadInternal()` must not
  // be called again.
  RecyclingPool<ZSTD_DCtx, ZSTD_DCtxDeleter>::Handle decompressor_;
};

// A `Reader` which decompresses data with Zstd after getting it from another
// `Reader`.
//
// The `Src` template parameter specifies the type of the object providing and
// possibly owning the compressed `Reader`. `Src` must support
// `Dependency<Reader*, Src>`, e.g. `Reader*` (not owned, default),
// `ChainReader<>` (owned), `std::unique_ptr<Reader>` (owned),
// `Any<Reader*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The compressed `Reader` must not be accessed until the `ZstdReader` is closed
// or no longer used.
template <typename Src = Reader*>
class ZstdReader : public ZstdReaderBase {
 public:
  // Creates a closed `ZstdReader`.
  explicit ZstdReader(Closed) noexcept : ZstdReaderBase(kClosed) {}

  // Will read from the compressed `Reader` provided by `src`.
  explicit ZstdReader(Initializer<Src> src, Options options = Options());

  ZstdReader(ZstdReader&& that) = default;
  ZstdReader& operator=(ZstdReader&& that) = default;

  // Makes `*this` equivalent to a newly constructed `ZstdReader`. This avoids
  // constructing a temporary `ZstdReader` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Src> src,
                                          Options options = Options());

  // Returns the object providing and possibly owning the compressed `Reader`.
  // Unchanged by `Close()`.
  Src& src() ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.manager(); }
  const Src& src() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return src_.manager();
  }
  Reader* SrcReader() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return src_.get();
  }

 protected:
  void Done() override;
  void SetReadAllHintImpl(bool read_all_hint) override;
  void VerifyEndImpl() override;

 private:
  // The object providing and possibly owning the compressed `Reader`.
  Dependency<Reader*, Src> src_;
};

explicit ZstdReader(Closed) -> ZstdReader<DeleteCtad<Closed>>;
template <typename Src>
explicit ZstdReader(Src&& src,
                    ZstdReaderBase::Options options = ZstdReaderBase::Options())
    -> ZstdReader<TargetT<Src>>;

// Returns `true` if the data look like they have been Zstd-compressed.
//
// The current position of `src` is unchanged.
bool RecognizeZstd(Reader& src);

// Returns the claimed uncompressed size of Zstd-compressed data.
//
// If the data consists of multiple frames, only the first frame is considered.
//
// Returns `std::nullopt` if the size was not stored or on failure. The size is
// stored if `ZstdWriterBase::Options::pledged_size() != std::nullopt`.
//
// The current position of `src` is unchanged.
std::optional<Position> ZstdUncompressedSize(Reader& src);

// Returns the dictionary ID needed to read `src`, or 0 if none is needed.
//
// Returns `std::nullopt` on failure.
//
// The current position of `src` is unchanged.
std::optional<uint32_t> ZstdDictId(Reader& src);

// Implementation details follow.

inline ZstdReaderBase::ZstdReaderBase(
    BufferOptions buffer_options, bool growing_source, bool concatenate,
    ZstdDictionary&& dictionary,
    const RecyclingPoolOptions& recycling_pool_options)
    : BufferedReader(buffer_options),
      growing_source_(growing_source),
      concatenate_(concatenate),
      dictionary_(std::move(dictionary)),
      recycling_pool_options_(recycling_pool_options) {}

inline ZstdReaderBase::ZstdReaderBase(ZstdReaderBase&& that) noexcept
    : BufferedReader(static_cast<BufferedReader&&>(that)),
      growing_source_(that.growing_source_),
      concatenate_(that.concatenate_),
      initial_compressed_pos_(that.initial_compressed_pos_),
      truncated_(that.truncated_),
      just_initialized_(that.just_initialized_),
      dictionary_(std::move(that.dictionary_)),
      recycling_pool_options_(that.recycling_pool_options_),
      decompressor_(std::move(that.decompressor_)) {}

inline ZstdReaderBase& ZstdReaderBase::operator=(
    ZstdReaderBase&& that) noexcept {
  BufferedReader::operator=(static_cast<BufferedReader&&>(that));
  growing_source_ = that.growing_source_;
  concatenate_ = that.concatenate_;
  initial_compressed_pos_ = that.initial_compressed_pos_;
  truncated_ = that.truncated_;
  just_initialized_ = that.just_initialized_;
  dictionary_ = std::move(that.dictionary_);
  recycling_pool_options_ = that.recycling_pool_options_;
  decompressor_ = std::move(that.decompressor_);
  return *this;
}

inline void ZstdReaderBase::Reset(Closed) {
  BufferedReader::Reset(kClosed);
  growing_source_ = false;
  concatenate_ = false;
  initial_compressed_pos_ = 0;
  truncated_ = false;
  just_initialized_ = false;
  recycling_pool_options_ = RecyclingPoolOptions();
  decompressor_.reset();
  dictionary_ = ZstdDictionary();
}

inline void ZstdReaderBase::Reset(
    BufferOptions buffer_options, bool growing_source, bool concatenate,
    ZstdDictionary&& dictionary,
    const RecyclingPoolOptions& recycling_pool_options) {
  BufferedReader::Reset(buffer_options);
  growing_source_ = growing_source;
  concatenate_ = concatenate;
  initial_compressed_pos_ = 0;
  truncated_ = false;
  just_initialized_ = false;
  recycling_pool_options_ = recycling_pool_options;
  decompressor_.reset();
  dictionary_ = std::move(dictionary);
}

template <typename Src>
inline ZstdReader<Src>::ZstdReader(Initializer<Src> src, Options options)
    : ZstdReaderBase(options.buffer_options(), options.growing_source(),
                     options.concatenate(), std::move(options.dictionary()),
                     options.recycling_pool_options()),
      src_(std::move(src)) {
  Initialize(src_.get());
}

template <typename Src>
inline void ZstdReader<Src>::Reset(Closed) {
  ZstdReaderBase::Reset(kClosed);
  src_.Reset();
}

template <typename Src>
inline void ZstdReader<Src>::Reset(Initializer<Src> src, Options options) {
  ZstdReaderBase::Reset(options.buffer_options(), options.growing_source(),
                        options.concatenate(), std::move(options.dictionary()),
                        options.recycling_pool_options());
  src_.Reset(std::move(src));
  Initialize(src_.get());
}

template <typename Src>
void ZstdReader<Src>::Done() {
  ZstdReaderBase::Done();
  if (src_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!src_->Close())) {
      FailWithoutAnnotation(AnnotateOverSrc(src_->status()));
    }
  }
}

template <typename Src>
void ZstdReader<Src>::SetReadAllHintImpl(bool read_all_hint) {
  ZstdReaderBase::SetReadAllHintImpl(read_all_hint);
  if (src_.IsOwning()) src_->SetReadAllHint(read_all_hint);
}

template <typename Src>
void ZstdReader<Src>::VerifyEndImpl() {
  ZstdReaderBase::VerifyEndImpl();
  if (src_.IsOwning() && ABSL_PREDICT_TRUE(ok())) src_->VerifyEnd();
}

}  // namespace riegeli

#endif  // RIEGELI_ZSTD_ZSTD_READER_H_


================================================
FILE: riegeli/zstd/zstd_writer.cc
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Enables the experimental zstd API:
//  * `ZSTD_WINDOWLOG_MIN`
//  * `ZSTD_WINDOWLOG_MAX`
//  * `ZSTD_TARGETCBLOCKSIZE_MIN`
//  * `ZSTD_TARGETCBLOCKSIZE_MAX`
//  * `ZSTD_c_srcSizeHint`
#define ZSTD_STATIC_LINKING_ONLY

#include "riegeli/zstd/zstd_writer.h"

#include <stddef.h>

#include <limits>
#include <memory>
#include <optional>
#include <utility>

#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/status.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/reader.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/zstd/zstd_reader.h"
#include "zstd.h"

namespace riegeli {

static_assert(ZstdWriterBase::Options::kMinWindowLog == ZSTD_WINDOWLOG_MIN);
static_assert(ZstdWriterBase::Options::kMaxWindowLog == ZSTD_WINDOWLOG_MAX);
static_assert(ZstdWriterBase::Options::kMinTargetCBlockSize ==
              ZSTD_TARGETCBLOCKSIZE_MIN);
static_assert(ZstdWriterBase::Options::kMaxTargetCBlockSize ==
              ZSTD_TARGETCBLOCKSIZE_MAX);

void ZstdWriterBase::Initialize(Writer* dest, int compression_level,
                                int window_log_or_0,
                                int target_cblock_size_or_0,
                                bool store_checksum) {
  RIEGELI_ASSERT_NE(dest, nullptr)
      << "Failed precondition of ZstdWriter: null Writer pointer";
  if (ABSL_PREDICT_FALSE(!dest->ok())) {
    FailWithoutAnnotation(AnnotateOverDest(dest->status()));
    return;
  }
  initial_compressed_pos_ = dest->pos();
  compressor_ = RecyclingPool<ZSTD_CCtx, ZSTD_CCtxDeleter>::global(
                    recycling_pool_options_)
                    .Get(
                        [] {
                          return std::unique_ptr<ZSTD_CCtx, ZSTD_CCtxDeleter>(
                              ZSTD_createCCtx());
                        },
                        [](ZSTD_CCtx* compressor) {
                          const size_t result = ZSTD_CCtx_reset(
                              compressor, ZSTD_reset_session_and_parameters);
                          RIEGELI_ASSERT(!ZSTD_isError(result))
                              << "ZSTD_CCtx_reset() failed: "
                              << ZSTD_getErrorName(result);
                        });
  if (ABSL_PREDICT_FALSE(compressor_ == nullptr)) {
    Fail(absl::InternalError("ZSTD_createCCtx() failed"));
    return;
  }
  {
    const size_t result = ZSTD_CCtx_setParameter(
        compressor_.get(), ZSTD_c_compressionLevel, compression_level);
    if (ABSL_PREDICT_FALSE(ZSTD_isError(result))) {
      Fail(absl::InternalError(absl::StrCat(
          "ZSTD_CCtx_setParameter(ZSTD_c_compressionLevel) failed: ",
          ZSTD_getErrorName(result))));
      return;
    }
  }
  if (window_log_or_0 != 0) {
    const size_t result = ZSTD_CCtx_setParameter(
        compressor_.get(), ZSTD_c_windowLog, window_log_or_0);
    if (ABSL_PREDICT_FALSE(ZSTD_isError(result))) {
      Fail(absl::InternalError(
          absl::StrCat("ZSTD_CCtx_setParameter(ZSTD_c_windowLog) failed: ",
                       ZSTD_getErrorName(result))));
      return;
    }
  }
  if (target_cblock_size_or_0 != 0) {
    const size_t result = ZSTD_CCtx_setParameter(
        compressor_.get(), ZSTD_c_targetCBlockSize, target_cblock_size_or_0);
    if (ABSL_PREDICT_FALSE(ZSTD_isError(result))) {
      Fail(absl::InternalError(absl::StrCat(
          "ZSTD_CCtx_setParameter(ZSTD_c_targetCBlockSize) failed: ",
          ZSTD_getErrorName(result))));
      return;
    }
  }
  {
    const size_t result = ZSTD_CCtx_setParameter(
        compressor_.get(), ZSTD_c_checksumFlag, store_checksum ? 1 : 0);
    if (ABSL_PREDICT_FALSE(ZSTD_isError(result))) {
      Fail(absl::InternalError(
          absl::StrCat("ZSTD_CCtx_setParameter(ZSTD_c_checksumFlag) failed: ",
                       ZSTD_getErrorName(result))));
      return;
    }
  }
  if (pledged_size_ != std::nullopt) {
    BufferedWriter::SetWriteSizeHintImpl(*pledged_size_);
    const size_t result = ZSTD_CCtx_setPledgedSrcSize(
        compressor_.get(), IntCast<unsigned long long>(*pledged_size_));
    if (ABSL_PREDICT_FALSE(ZSTD_isError(result))) {
      Fail(absl::InternalError(
          absl::StrCat("ZSTD_CCtx_setPledgedSrcSize() failed: ",
                       ZSTD_getErrorName(result))));
      return;
    }
  }
  if (!dictionary_.empty()) {
    compression_dictionary_ =
        dictionary_.PrepareCompressionDictionary(compression_level);
    if (ABSL_PREDICT_FALSE(compression_dictionary_ == nullptr)) {
      Fail(absl::InternalError("ZSTD_createCDict_advanced() failed"));
      return;
    }
    const size_t result =
        ZSTD_CCtx_refCDict(compressor_.get(), compression_dictionary_.get());
    if (ABSL_PREDICT_FALSE(ZSTD_isError(result))) {
      Fail(absl::InternalError(absl::StrCat("ZSTD_CCtx_refCDict() failed: ",
                                            ZSTD_getErrorName(result))));
      return;
    }
  }
}

void ZstdWriterBase::DoneBehindBuffer(absl::string_view src) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::DoneBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!ok())) return;
  Writer& dest = *DestWriter();
  WriteInternal(src, dest, ZSTD_e_end);
}

void ZstdWriterBase::Done() {
  BufferedWriter::Done();
  compressor_.reset();
  dictionary_ = ZstdDictionary();
  associated_reader_.Reset();
}

absl::Status ZstdWriterBase::AnnotateStatusImpl(absl::Status status) {
  if (is_open()) {
    Writer& dest = *DestWriter();
    status = dest.AnnotateStatus(std::move(status));
  }
  // The status might have been annotated by `dest` with the compressed
  // position. Clarify that the current position is the uncompressed position
  // instead of delegating to `BufferedWriter::AnnotateStatusImpl()`.
  return AnnotateOverDest(std::move(status));
}

absl::Status ZstdWriterBase::AnnotateOverDest(absl::Status status) {
  if (is_open()) {
    return Annotate(status, absl::StrCat("at uncompressed byte ", pos()));
  }
  return status;
}

void ZstdWriterBase::SetWriteSizeHintImpl(
    std::optional<Position> write_size_hint) {
  BufferedWriter::SetWriteSizeHintImpl(write_size_hint);
  if (ABSL_PREDICT_FALSE(!ok()) || compressor_ == nullptr) return;
  // Ignore failure if compression already started.
  ZSTD_CCtx_setParameter(
      compressor_.get(), ZSTD_c_srcSizeHint,
      write_size_hint == std::nullopt
          ? 0
          : SaturatingIntCast<int>(SaturatingAdd(pos(), *write_size_hint)));
}

bool ZstdWriterBase::WriteInternal(absl::string_view src) {
  RIEGELI_ASSERT(!src.empty())
      << "Failed precondition of BufferedWriter::WriteInternal(): "
         "nothing to write";
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of BufferedWriter::WriteInternal()";
  Writer& dest = *DestWriter();
  return WriteInternal(src, dest, ZSTD_e_continue);
}

inline bool ZstdWriterBase::WriteInternal(absl::string_view src, Writer& dest,
                                          ZSTD_EndDirective end_op) {
  RIEGELI_ASSERT_OK(*this)
      << "Failed precondition of ZstdWriterBase::WriteInternal()";
  if (ABSL_PREDICT_FALSE(src.size() >
                         std::numeric_limits<Position>::max() - start_pos())) {
    return FailOverflow();
  }
  if (pledged_size_ != std::nullopt) {
    const Position next_pos = start_pos() + src.size();
    if (compressor_ == nullptr) {
      if (ABSL_PREDICT_FALSE(!src.empty())) {
        return Fail(absl::FailedPreconditionError(
            absl::StrCat("Actual size does not match pledged size: ", next_pos,
                         " > ", *pledged_size_)));
      }
      return true;
    }
    if (next_pos >= *pledged_size_) {
      // Notify `compressor_` that this is the last fragment. This enables
      // optimizations (compressing directly to a long enough output buffer).
      end_op = ZSTD_e_end;
      if (reserve_max_size_ && start_pos() == 0) {
        // Ensure that the output buffer is actually long enough.
        dest.Push(ZSTD_compressBound(*pledged_size_));
      }
    }
    if (end_op == ZSTD_e_end) {
      if (ABSL_PREDICT_FALSE(next_pos != *pledged_size_)) {
        return Fail(absl::FailedPreconditionError(absl::StrCat(
            "Actual size does not match pledged size: ", next_pos,
            next_pos > *pledged_size_ ? " > " : " < ", *pledged_size_)));
      }
    }
  }
  ZSTD_inBuffer input = {src.data(), src.size(), 0};
  for (;;) {
    ZSTD_outBuffer output = {dest.cursor(), dest.available(), 0};
    const size_t result =
        ZSTD_compressStream2(compressor_.get(), &output, &input, end_op);
    dest.set_cursor(static_cast<char*>(output.dst) + output.pos);
    if (result == 0) {
      RIEGELI_ASSERT_EQ(input.pos, input.size)
          << "ZSTD_compressStream2() returned 0 but there are still input data";
      move_start_pos(input.pos);
      if (end_op == ZSTD_e_end) compressor_.reset();
      return true;
    }
    if (ABSL_PREDICT_FALSE(ZSTD_isError(result))) {
      return Fail(absl::InternalError(absl::StrCat(
          "ZSTD_compressStream2() failed: ", ZSTD_getErrorName(result))));
    }
    if (output.pos < output.size) {
      RIEGELI_ASSERT_EQ(input.pos, input.size)
          << "ZSTD_compressStream2() returned but there are still input data "
             "and output space";
      move_start_pos(input.pos);
      return true;
    }
    if (ABSL_PREDICT_FALSE(!dest.Push(1, result))) {
      return FailWithoutAnnotation(AnnotateOverDest(dest.status()));
    }
  }
}

bool ZstdWriterBase::FlushBehindBuffer(absl::string_view src,
                                       FlushType flush_type) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::FlushBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!ok())) return false;
  Writer& dest = *DestWriter();
  return WriteInternal(src, dest, ZSTD_e_flush);
}

bool ZstdWriterBase::SupportsReadMode() {
  Writer* const dest = DestWriter();
  return dest != nullptr && dest->SupportsReadMode();
}

Reader* ZstdWriterBase::ReadModeBehindBuffer(Position initial_pos) {
  RIEGELI_ASSERT_EQ(start_to_limit(), 0u)
      << "Failed precondition of BufferedWriter::ReadModeBehindBuffer(): "
         "buffer not empty";
  if (ABSL_PREDICT_FALSE(!ZstdWriterBase::FlushBehindBuffer(
          absl::string_view(), FlushType::kFromObject))) {
    return nullptr;
  }
  Writer& dest = *DestWriter();
  Reader* const compressed_reader = dest.ReadMode(initial_compressed_pos_);
  if (ABSL_PREDICT_FALSE(compressed_reader == nullptr)) {
    FailWithoutAnnotation(AnnotateOverDest(dest.status()));
    return nullptr;
  }
  ZstdReader<>* const reader = associated_reader_.ResetReader(
      compressed_reader,
      ZstdReaderBase::Options()
          .set_dictionary(dictionary_)
          .set_buffer_options(buffer_options())
          .set_recycling_pool_options(recycling_pool_options_));
  reader->Seek(initial_pos);
  return reader;
}

}  // namespace riegeli


================================================
FILE: riegeli/zstd/zstd_writer.h
================================================
// Copyright 2017 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef RIEGELI_ZSTD_ZSTD_WRITER_H_
#define RIEGELI_ZSTD_ZSTD_WRITER_H_

#include <stddef.h>

#include <optional>
#include <utility>

#include "absl/base/attributes.h"
#include "absl/base/optimization.h"
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "riegeli/base/arithmetic.h"
#include "riegeli/base/assert.h"
#include "riegeli/base/dependency.h"
#include "riegeli/base/initializer.h"
#include "riegeli/base/object.h"
#include "riegeli/base/recycling_pool.h"
#include "riegeli/base/types.h"
#include "riegeli/bytes/buffer_options.h"
#include "riegeli/bytes/buffered_writer.h"
#include "riegeli/bytes/writer.h"
#include "riegeli/zstd/zstd_dictionary.h"  // IWYU pragma: export
#include "zstd.h"

namespace riegeli {

class Reader;
template <typename Src>
class ZstdReader;

// Template parameter independent part of `ZstdWriter`.
class ZstdWriterBase : public BufferedWriter {
 public:
  class Options : public BufferOptionsBase<Options> {
   public:
    Options() noexcept {}

    // Tunes the tradeoff between compression density and compression speed
    // (higher = better density but slower).
    //
    // `compression_level` must be between `kMinCompressionLevel` (-131072) and
    // `kMaxCompressionLevel` (22). Level 0 is currently equivalent to 3.
    // Default: `kDefaultCompressionLevel` (3).
    static constexpr int kMinCompressionLevel =
        -(1 << 17);                                  // `ZSTD_minCLevel()`
    static constexpr int kMaxCompressionLevel = 22;  // `ZSTD_maxCLevel()`
    static constexpr int kDefaultCompressionLevel = 3;
    Options& set_compression_level(int compression_level) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      RIEGELI_ASSERT_GE(compression_level, kMinCompressionLevel)
          << "Failed precondition of "
             "ZstdWriterBase::Options::set_compression_level(): "
             "compression level out of range";
      RIEGELI_ASSERT_LE(compression_level, kMaxCompressionLevel)
          << "Failed precondition of "
             "ZstdWriterBase::Options::set_compression_level(): "
             "compression level out of range";
      compression_level_ = compression_level;
      return *this;
    }
    Options&& set_compression_level(int compression_level) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_compression_level(compression_level));
    }
    int compression_level() const { return compression_level_; }

    // Logarithm of the LZ77 sliding window size. This tunes the tradeoff
    // between compression density and memory usage (higher = better density but
    // more memory).
    //
    // Special value `std::nullopt` means to derive `window_log` from
    // `compression_level` and `size_hint`.
    //
    // `window_log` must be `std::nullopt` or between `kMinWindowLog` (10) and
    // `kMaxWindowLog` (30 in 32-bit build, 31 in 64-bit build). Default:
    // `std::nullopt`.
    static constexpr int kMinWindowLog = 10;  // `ZSTD_WINDOWLOG_MIN`
    static constexpr int kMaxWindowLog =
        sizeof(size_t) == 4 ? 30 : 31;  // `ZSTD_WINDOWLOG_MAX`
    Options& set_window_log(std::optional<int> window_log) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      if (window_log != std::nullopt) {
        RIEGELI_ASSERT_GE(*window_log, kMinWindowLog)
            << "Failed precondition of "
               "ZstdWriterBase::Options::set_window_log(): "
               "window log out of range";
        RIEGELI_ASSERT_LE(*window_log, kMaxWindowLog)
            << "Failed precondition of "
               "ZstdWriterBase::Options::set_window_log(): "
               "window log out of range";
      }
      window_log_or_0_ = window_log.value_or(0);
      return *this;
    }
    Options&& set_window_log(std::optional<int> window_log) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_window_log(window_log));
    }
    std::optional<int> window_log() const {
      if (window_log_or_0_ == 0) return std::nullopt;
      return window_log_or_0_;
    }

    // Attempts to fit compressed block size into approximately
    // `target_cblock_size`. A lower value allows the decompressor to begin
    // decompression sooner, having less data available, at the cost of
    // reducing compression density.
    //
    // Special value `std::nullopt` means no target.
    //
    // `target_cblock_size` must be `std::nullopt` or between
    // `kMinTargetCBlockSize` (1340) and `kMaxTargetCBlockSize` (131072).
    // Default: `std::nullopt`.
    static constexpr int kMinTargetCBlockSize =
        1340;  // `ZSTD_TARGETCBLOCKSIZE_MIN`
    static constexpr int kMaxTargetCBlockSize =
        1 << 17;  // `ZSTD_TARGETCBLOCKSIZE_MAX`
    Options& set_target_cblock_size(std::optional<int> target_cblock_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      if (target_cblock_size != std::nullopt) {
        RIEGELI_ASSERT_GE(*target_cblock_size, kMinTargetCBlockSize)
            << "Failed precondition of "
               "ZstdWriterBase::Options::set_target_cblock_size(): "
               "target CBlock size out of range";
        RIEGELI_ASSERT_LE(*target_cblock_size, kMaxTargetCBlockSize)
            << "Failed precondition of "
               "ZstdWriterBase::Options::set_target_cblock_size(): "
               "target CBlock size out of range";
      }
      target_cblock_size_or_0_ = target_cblock_size.value_or(0);
      return *this;
    }
    Options&& set_target_cblock_size(std::optional<int> target_cblock_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_target_cblock_size(target_cblock_size));
    }
    std::optional<int> target_cblock_size() const {
      if (target_cblock_size_or_0_ == 0) return std::nullopt;
      return target_cblock_size_or_0_;
    }

    // Zstd dictionary. The same dictionary must be used for decompression.
    //
    // Default: `ZstdDictionary()`.
    Options& set_dictionary(ZstdDictionary dictionary) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      dictionary_ = std::move(dictionary);
      return *this;
    }
    Options&& set_dictionary(ZstdDictionary dictionary) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_dictionary(std::move(dictionary)));
    }
    ZstdDictionary& dictionary() ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return dictionary_;
    }
    const ZstdDictionary& dictionary() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return dictionary_;
    }

    // If `true`, computes checksum of uncompressed data and stores it in the
    // compressed stream. This lets decompression verify the checksum.
    //
    // Default: `false`.
    Options& set_store_checksum(bool store_checksum) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      store_checksum_ = store_checksum;
      return *this;
    }
    Options&& set_store_checksum(bool store_checksum) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_store_checksum(store_checksum));
    }
    bool store_checksum() const { return store_checksum_; }

    // Exact uncompressed size, or `std::nullopt` if unknown. This may improve
    // compression density and performance, and causes the size to be stored in
    // the compressed stream header.
    //
    // If the pledged size turns out to not match reality, compression fails.
    //
    // Default: `std::nullopt`.
    Options& set_pledged_size(std::optional<Position> pledged_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      pledged_size_ = pledged_size;
      return *this;
    }
    Options&& set_pledged_size(std::optional<Position> pledged_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_pledged_size(pledged_size));
    }
    std::optional<Position> pledged_size() const { return pledged_size_; }

    // If `false`, `ZstdWriter` lets the destination choose buffer sizes.
    //
    // If `true`, `ZstdWriter` tries to compress all data in one step:
    //
    //  * Flattens uncompressed data if `pledged_size()` is not `std::nullopt`.
    //
    //  * Asks the destination for a flat buffer with the maximum possible
    //    compressed size, as long as the uncompressed size is known before
    //    compression begins, e.g. if `pledged_size()` is not `std::nullopt`.
    //
    // This makes compression slightly faster, but increases memory usage.
    //
    // Default: `false`.
    Options& set_reserve_max_size(bool reserve_max_size) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      reserve_max_size_ = reserve_max_size;
      return *this;
    }
    Options&& set_reserve_max_size(bool reserve_max_size) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_reserve_max_size(reserve_max_size));
    }
    bool reserve_max_size() const { return reserve_max_size_; }

    // Returns effective `BufferOptions` as overridden by other options:
    // If `reserve_max_size()` is `true` and `pledged_size()` is not
    // `std::nullopt`, then `pledged_size()` overrides `buffer_size()`.
    BufferOptions effective_buffer_options() const {
      BufferOptions options = buffer_options();
      if (reserve_max_size() && pledged_size() != std::nullopt) {
        options.set_buffer_size(
            UnsignedMax(SaturatingIntCast<size_t>(*pledged_size()), size_t{1}));
      }
      return options;
    }

    // Options for a global `RecyclingPool` of compression contexts.
    //
    // They tune the amount of memory which is kept to speed up creation of new
    // compression sessions, and usage of a background thread to clean it.
    //
    // Default: `RecyclingPoolOptions()`.
    Options& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      recycling_pool_options_ = recycling_pool_options;
      return *this;
    }
    Options&& set_recycling_pool_options(
        const RecyclingPoolOptions& recycling_pool_options) &&
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return std::move(set_recycling_pool_options(recycling_pool_options));
    }
    const RecyclingPoolOptions& recycling_pool_options() const
        ABSL_ATTRIBUTE_LIFETIME_BOUND {
      return recycling_pool_options_;
    }

   private:
    int compression_level_ = kDefaultCompressionLevel;
    int window_log_or_0_ = 0;
    int target_cblock_size_or_0_ = 0;
    ZstdDictionary dictionary_;
    bool store_checksum_ = false;
    std::optional<Position> pledged_size_;
    bool reserve_max_size_ = false;
    RecyclingPoolOptions recycling_pool_options_;
  };

  // Returns the compressed `Writer`. Unchanged by `Close()`.
  virtual Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND = 0;

  bool SupportsReadMode() override;

 protected:
  explicit ZstdWriterBase(Closed) noexcept : BufferedWriter(kClosed) {}

  explicit ZstdWriterBase(BufferOptions buffer_options,
                          ZstdDictionary&& dictionary,
                          std::optional<Position> pledged_size,
                          bool reserve_max_size,
                          const RecyclingPoolOptions& recycling_pool_options);

  ZstdWriterBase(ZstdWriterBase&& that) noexcept;
  ZstdWriterBase& operator=(ZstdWriterBase&& that) noexcept;

  void Reset(Closed);
  void Reset(BufferOptions buffer_options, ZstdDictionary&& dictionary,
             std::optional<Position> pledged_size, bool reserve_max_size,
             const RecyclingPoolOptions& recycling_pool_options);
  void Initialize(Writer* dest, int compression_level, int window_log_or_0,
                  int target_cblock_size_or_0, bool store_checksum);
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateOverDest(absl::Status status);

  void DoneBehindBuffer(absl::string_view src) override;
  void Done() override;
  ABSL_ATTRIBUTE_COLD absl::Status AnnotateStatusImpl(
      absl::Status status) override;
  void SetWriteSizeHintImpl(std::optional<Position> write_size_hint) override;
  bool WriteInternal(absl::string_view src) override;
  bool FlushBehindBuffer(absl::string_view src, FlushType flush_type) override;
  Reader* ReadModeBehindBuffer(Position initial_pos) override;

 private:
  struct ZSTD_CCtxDeleter {
    void operator()(ZSTD_CCtx* ptr) const { ZSTD_freeCCtx(ptr); }
  };

  bool WriteInternal(absl::string_view src, Writer& dest,
                     ZSTD_EndDirective end_op);

  ZstdDictionary dictionary_;
  ZstdDictionary::ZSTD_CDictHandle compression_dictionary_;
  std::optional<Position> pledged_size_;
  bool reserve_max_size_ = false;
  Position initial_compressed_pos_ = 0;
  RecyclingPoolOptions recycling_pool_options_;
  // If `ok()` but `compressor_ == nullptr` then `*pledged_size_` has been
  // reached. In this case `ZSTD_compressStream()` must not be called again.
  RecyclingPool<ZSTD_CCtx, ZSTD_CCtxDeleter>::Handle compressor_;

  AssociatedReader<ZstdReader<Reader*>> associated_reader_;
};

// A `Writer` which compresses data with Zstd before passing it to another
// `Writer`.
//
// The `Dest` template parameter specifies the type of the object providing and
// possibly owning the compressed `Writer`. `Dest` must support
// `Dependency<Writer*, Dest>`, e.g. `Writer*` (not owned, default),
// `ChainWriter<>` (owned), `std::unique_ptr<Writer>` (owned),
// `Any<Writer*>` (maybe owned).
//
// By relying on CTAD the template argument can be deduced as `TargetT` of the
// type of the first constructor argument.
//
// The compressed `Writer` must not be accessed until the `ZstdWriter` is closed
// or no longer used, except that it is allowed to read the destination of the
// compressed `Writer` immediately after `Flush()`.
template <typename Dest = Writer*>
class ZstdWriter : public ZstdWriterBase {
 public:
  // Creates a closed `ZstdWriter`.
  explicit ZstdWriter(Closed) noexcept : ZstdWriterBase(kClosed) {}

  // Will write to the compressed `Writer` provided by `dest`.
  explicit ZstdWriter(Initializer<Dest> dest, Options options = Options());

  ZstdWriter(ZstdWriter&& that) = default;
  ZstdWriter& operator=(ZstdWriter&& that) = default;

  // Makes `*this` equivalent to a newly constructed `ZstdWriter`. This avoids
  // constructing a temporary `ZstdWriter` and moving from it.
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Closed);
  ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer<Dest> dest,
                                          Options options = Options());

  // Returns the object providing and possibly owning the compressed `Writer`.
  // Unchanged by `Close()`.
  Dest& dest() ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.manager(); }
  const Dest& dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND {
    return dest_.manager();
  }
  Writer* DestWriter() const ABSL_ATTRIBUTE_LIFETIME_BOUND override {
    return dest_.get();
  }

 protected:
  void Done() override;
  bool FlushImpl(FlushType flush_type) override;

 private:
  // The object providing and possibly owning the compressed `Writer`.
  Dependency<Writer*, Dest> dest_;
};

explicit ZstdWriter(Closed) -> ZstdWriter<DeleteCtad<Closed>>;
template <typename Dest>
explicit ZstdWriter(Dest&& dest,
                    ZstdWriterBase::Options options = ZstdWriterBase::Options())
    -> ZstdWriter<TargetT<Dest>>;

// Implementation details follow.

inline ZstdWriterBase::ZstdWriterBase(
    BufferOptions buffer_options, ZstdDictionary&& dictionary,
    std::optional<Position> pledged_size, bool reserve_max_size,
    const RecyclingPoolOptions& recycling_pool_options)
    : BufferedWriter(buffer_options),
      dictionary_(std::move(dictionary)),
      pledged_size_(pledged_size),
      reserve_max_size_(reserve_max_size),
      recycling_pool_options_(recycling_pool_options) {}

inline ZstdWriterBase::ZstdWriterBase(ZstdWriterBase&& that) noexcept
    : BufferedWriter(static_cast<BufferedWriter&&>(that)),
      dictionary_(std::move(that.dictionary_)),
      compression_dictionary_(std::move(that.compression_dictionary_)),
      pledged_size_(that.pledged_size_),
      reserve_max_size_(that.reserve_max_size_),
      initial_compressed_pos_(that.initial_compressed_pos_),
      recycling_pool_options_(that.recycling_pool_options_),
      compressor_(std::move(that.compressor_)),
      associated_reader_(std::move(that.associated_reader_)) {}

inline ZstdWriterBase& ZstdWriterBase::operator=(
    ZstdWriterBase&& that) noexcept {
  BufferedWriter::operator=(static_cast<BufferedWriter&&>(that));
  dictionary_ = std::move(that.dictionary_);
  compression_dictionary_ = std::move(that.compression_dictionary_);
  pledged_size_ = that.pledged_size_;
  reserve_max_size_ = that.reserve_max_size_;
  initial_compressed_pos_ = that.initial_compressed_pos_;
  recycling_pool_options_ = that.recycling_pool_options_;
  compressor_ = std::move(that.compressor_);
  associated_reader_ = std::move(that.associated_reader_);
  return *this;
}

inline void ZstdWriterBase::Reset(Closed) {
  BufferedWriter::Reset(kClosed);
  pledged_size_ = std::nullopt;
  reserve_max_size_ = false;
  initial_compressed_pos_ = 0;
  recycling_pool_options_ = RecyclingPoolOptions();
  compressor_.reset();
  // Must be destroyed after `compressor_`.
  dictionary_ = ZstdDictionary();
  compression_dictionary_.reset();
  associated_reader_.Reset();
}

inline void ZstdWriterBase::Reset(
    BufferOptions buffer_options, ZstdDictionary&& dictionary,
    std::optional<Position> pledged_size, bool reserve_max_size,
    const RecyclingPoolOptions& recycling_pool_options) {
  BufferedWriter::Reset(buffer_options);
  pledged_size_ = pledged_size;
  reserve_max_size_ = reserve_max_size;
  initial_compressed_pos_ = 0;
  recycling_pool_options_ = recycling_pool_options;
  compressor_.reset();
  dictionary_ = std::move(dictionary);
  compression_dictionary_.reset();
  associated_reader_.Reset();
}

template <typename Dest>
inline ZstdWriter<Dest>::ZstdWriter(Initializer<Dest> dest, Options options)
    : ZstdWriterBase(options.effective_buffer_options(),
                     std::move(options.dictionary()), options.pledged_size(),
                     options.reserve_max_size(),
                     options.recycling_pool_options()),
      dest_(std::move(dest)) {
  Initialize(dest_.get(), options.compression_level(),
             options.window_log().value_or(0),
             options.target_cblock_size().value_or(0),
             options.store_checksum());
}

template <typename Dest>
inline void ZstdWriter<Dest>::Reset(Closed) {
  ZstdWriterBase::Reset(kClosed);
  dest_.Reset();
}

template <typename Dest>
inline void ZstdWriter<Dest>::Reset(Initializer<Dest> dest, Options options) {
  ZstdWriterBase::Reset(options.effective_buffer_options(),
                        std::move(options.dictionary()), options.pledged_size(),
                        options.reserve_max_size(),
                        options.recycling_pool_options());
  dest_.Reset(std::move(dest));
  Initialize(dest_.get(), options.compression_level(),
             options.window_log().value_or(0),
             options.target_cblock_size().value_or(0),
             options.store_checksum());
}

template <typename Dest>
void ZstdWriter<Dest>::Done() {
  ZstdWriterBase::Done();
  if (dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Close())) {
      FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
}

template <typename Dest>
bool ZstdWriter<Dest>::FlushImpl(FlushType flush_type) {
  if (ABSL_PREDICT_FALSE(!ZstdWriterBase::FlushImpl(flush_type))) return false;
  if (flush_type != FlushType::kFromObject || dest_.IsOwning()) {
    if (ABSL_PREDICT_FALSE(!dest_->Flush(flush_type))) {
      return FailWithoutAnnotation(AnnotateOverDest(dest_->status()));
    }
  }
  return true;
}

}  // namespace riegeli

#endif  // RIEGELI_ZSTD_ZSTD_WRITER_H_


================================================
FILE: tf_dependency/BUILD
================================================


================================================
FILE: tf_dependency/BUILD.tpl
================================================
package(
    default_visibility = ["//visibility:public"],
    features = ["header_modules"],
)

cc_library(
    name = "tf_header_lib",
    hdrs = [":tf_header_include"],
    includes = ["include"],
    visibility = ["//visibility:public"],
)

cc_library(
    name = "libtensorflow_framework",
    srcs = [":%{TF_SHARED_LIBRARY_NAME}"],
    visibility = ["//visibility:public"],
)

%{TF_HEADER_GENRULE}
%{TF_SHARED_LIBRARY_GENRULE}


================================================
FILE: tf_dependency/tf_configure.bzl
================================================
"""Setup TensorFlow as external dependency"""

_TF_HEADER_DIR = "TF_HEADER_DIR"
_TF_SHARED_LIBRARY_DIR = "TF_SHARED_LIBRARY_DIR"
_TF_SHARED_LIBRARY_NAME = "TF_SHARED_LIBRARY_NAME"

def _tpl(repository_ctx, tpl, substitutions = {}, out = None):
    if not out:
        out = tpl
    repository_ctx.template(
        out,
        Label("//tf_dependency:{}.tpl".format(tpl)),
        substitutions,
    )

def _fail(msg):
    """Outputs failure message when auto configuration fails."""
    red = "\033[0;31m"
    no_color = "\033[0m"
    fail("{}Python Configuration Error:{} {}\n".format(red, no_color, msg))

def _is_windows(repository_ctx):
    """Returns true if the host operating system is windows."""
    os_name = repository_ctx.os.name.lower()
    return "windows" in os_name

def _execute(
        repository_ctx,
        cmdline,
        error_msg = None,
        error_details = None,
        empty_stdout_fine = False):
    """Executes an arbitrary shell command.

    Helper for executes an arbitrary shell command.

    Args:
      repository_ctx: the repository_ctx object.
      cmdline: list of strings, the command to execute.
      error_msg: string, a summary of the error if the command fails.
      error_details: string, details about the error or steps to fix it.
      empty_stdout_fine: bool, if True, an empty stdout result is fine,
        otherwise it's an error.

    Returns:
      The result of repository_ctx.execute(cmdline).
    """
    result = repository_ctx.execute(cmdline)
    if result.stderr or not (empty_stdout_fine or result.stdout):
        _fail("\n".join([
            error_msg.strip() if error_msg else "Repository command failed",
            result.stderr.strip(),
            error_details if error_details else "",
        ]))
    return result

def _read_dir(repository_ctx, src_dir):
    """Returns a string with all files in a directory.

    Finds all files inside a directory, traversing subfolders and following
    symlinks. The returned string contains the full path of all files
    separated by line breaks.

    Args:
        repository_ctx: the repository_ctx object.
        src_dir: directory to find files from.

    Returns:
        A string of all files inside the given dir.
    """
    if _is_windows(repository_ctx):
        src_dir = src_dir.replace("/", "\\")
        find_result = _execute(
            repository_ctx,
            ["cmd.exe", "/c", "dir", src_dir, "/b", "/s", "/a-d"],
            empty_stdout_fine = True,
        )

        # src_files will be used in genrule.outs where the paths must
        # use forward slashes.
        result = find_result.stdout.replace("\\", "/")
    else:
        find_result = _execute(
            repository_ctx,
            ["find", src_dir, "-follow", "-type", "f"],
            empty_stdout_fine = True,
        )
        result = find_result.stdout
    return result

def _genrule(genrule_name, command, outs):
    """Returns a string with a genrule.

    Genrule executes the given command and produces the given outputs.

    Args:
        genrule_name: A unique name for genrule target.
        command: The command to run.
        outs: A list of files generated by this rule.

    Returns:
        A genrule target.
    """
    return (
        "genrule(\n" +
        '    name = "{}",\n' +
        "    outs = [\n" +
        "{}\n" +
        "    ],\n" +
        '    cmd = """\n' +
        "{}\n" +
        '   """,\n' +
        ")\n"
    ).format(genrule_name, outs, command)

def _norm_path(path):
    """Returns a path with '/' and removes the trailing slash."""
    return path.replace("\\", "/").rstrip("/")

def _symlink_genrule_for_dir(
        repository_ctx,
        src_dir,
        dest_dir,
        genrule_name,
        src_files = [],
        dest_files = []):
    """Returns a genrule to symlink(or copy if on Windows) a set of files.

    If src_dir is passed, files will be read from the given directory; otherwise
    we assume files are in src_files and dest_files.

    Args:
        repository_ctx: the repository_ctx object.
        src_dir: source directory.
        dest_dir: directory to create symlink in.
        genrule_name: genrule name.
        src_files: list of source files instead of src_dir.
        dest_files: list of corresonding destination files.

    Returns:
        genrule target that creates the symlinks.
    """
    if src_dir != None:
        src_dir = _norm_path(src_dir)
        dest_dir = _norm_path(dest_dir)
        files = "\n".join(
            sorted(_read_dir(repository_ctx, src_dir).splitlines()),
        )

        # Create a list with the src_dir stripped to use for outputs.
        dest_files = files.replace(src_dir, "").splitlines()
        src_files = files.splitlines()
    command = []
    outs = []
    for i in range(len(dest_files)):
        if dest_files[i] != "":
            # If we have only one file to link we do not want to use the
            # dest_dir, as $(@D) will include the full path to the file.
            dest = "$(@D)/{}{}".format(
                dest_dir if len(dest_files) != 1 else "",
                dest_files[i],
            )

            # Copy the headers to create a sandboxable setup.
            cmd = "cp -f"
            command.append('{} "{}" "{}"'.format(cmd, src_files[i], dest))
            outs.append('        "{}{}",'.format(dest_dir, dest_files[i]))
    genrule = _genrule(
        genrule_name,
        " && ".join(command),
        "\n".join(outs),
    )
    return genrule

def _tf_pip_impl(repository_ctx):
    tf_header_dir = repository_ctx.os.environ[_TF_HEADER_DIR]
    tf_header_rule = _symlink_genrule_for_dir(
        repository_ctx,
        tf_header_dir,
        "include",
        "tf_header_include",
    )

    tf_shared_library_dir = repository_ctx.os.environ[_TF_SHARED_LIBRARY_DIR]
    tf_shared_library_name = repository_ctx.os.environ[_TF_SHARED_LIBRARY_NAME]
    tf_shared_library_path = "{}/{}".format(
        tf_shared_library_dir,
        tf_shared_library_name,
    )

    tf_shared_library_rule = _symlink_genrule_for_dir(
        repository_ctx,
        None,
        "",
        tf_shared_library_name,
        [tf_shared_library_path],
        [tf_shared_library_name],
    )

    _tpl(repository_ctx, "BUILD", {
        "%{TF_HEADER_GENRULE}": tf_header_rule,
        "%{TF_SHARED_LIBRARY_GENRULE}": tf_shared_library_rule,
        "%{TF_SHARED_LIBRARY_NAME}": tf_shared_library_name,
    })

tf_configure = repository_rule(
    implementation = _tf_pip_impl,
    environ = [
        _TF_HEADER_DIR,
        _TF_SHARED_LIBRARY_DIR,
        _TF_SHARED_LIBRARY_NAME,
    ],
)